• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

Clinical-Genomics / demultiplexing / 4627485829

pending completion
4627485829

push

github-actions

karlnyr
remova conda stuff, expand aliases

501 of 941 relevant lines covered (53.24%)

0.53 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

35.45
/demux/utils/novaseq_samplesheet.py
1
"""
2
    Create a samplesheet for NovaSeq flowcells
3
"""
4
import csv
1✔
5
import sys
1✔
6
from distutils.version import StrictVersion, LooseVersion
1✔
7
from typing import Any, Dict, List, Union
1✔
8

9
from cglims.api import ClinicalLims
1✔
10
from demux.constants.constants import COMMA, DASH, SPACE
1✔
11
from demux.constants.samplesheet import (
1✔
12
    NIPT_INDEX_LENGTH,
13
    PARAMETER_TO_VERSION,
14
    REV_COMP_CONTROL_SOFTWARE_VERSION,
15
    REV_COMP_REAGENT_KIT_VERSION,
16
)
17
from demux.exc import NoValidReagentKitFound
1✔
18

19
from .runparameters import NovaseqRunParameters
1✔
20
from .samplesheet import Samplesheet
1✔
21

22

23
class CreateNovaseqSamplesheet:
1✔
24
    """Create a raw sample sheet for NovaSeq flowcells"""
25

26
    LIMS_KEYS = [
1✔
27
        "fcid",
28
        "lane",
29
        "sample_id",
30
        "sample_ref",
31
        "index",
32
        "index2",
33
        "sample_name",
34
        "control",
35
        "recipe",
36
        "operator",
37
        "project",
38
    ]
39

40
    def __init__(
1✔
41
        self,
42
        dummy_indexes_file: str,
43
        flowcell: str,
44
        lims_config: dict,
45
        pad: bool,
46
        runs_dir: str,
47
    ):
48
        self.dummy_indexes_file = dummy_indexes_file
×
49
        self.flowcell = flowcell
×
50
        self.lims_api = ClinicalLims(**lims_config)
×
51
        self.pad = pad
×
52
        self.runparameters = NovaseqRunParameters(self.flowcell, runs_dir)
×
53

54
    @property
1✔
55
    def control_software_version(self) -> StrictVersion:
1✔
56
        """Returns control software version in StrictVersion format"""
57
        return StrictVersion(self.runparameters.control_software_version)
×
58

59
    @property
1✔
60
    def reagent_kit_version(self) -> LooseVersion:
1✔
61
        """Derives the reagent kit version from the run parameters"""
62

63
        reagent_kit_version = self.runparameters.reagent_kit_version
×
64
        if reagent_kit_version not in PARAMETER_TO_VERSION.keys():
×
65
            raise NoValidReagentKitFound(
×
66
                f"Expected reagent kit version {', '.join(PARAMETER_TO_VERSION.keys())}. Found {reagent_kit_version} instead. Exiting",
67
            )
68

69
        return LooseVersion(PARAMETER_TO_VERSION[reagent_kit_version])
×
70

71
    @property
1✔
72
    def header(self) -> list:
1✔
73
        """Create the sample sheet header"""
74
        return list(Samplesheet.header_map.values())
×
75

76
    def get_dummy_sample_information(
1✔
77
        self, dummy_index: str, lane: int, name: str
78
    ) -> Dict[Union[str, Any], Union[Union[str, int], Any]]:
79
        """Constructs and returns a dummy sample in novaseq samplesheet format"""
80

81
        return {
×
82
            "control": "N",
83
            "description": "",
84
            "fcid": self.flowcell,
85
            "index": dummy_index,
86
            "index2": "",
87
            "lane": lane,
88
            "operator": "script",
89
            "project": "indexcheck",
90
            "recipe": "R1",
91
            "sample_id": name.replace(" ", "-").replace("(", "-").replace(")", "-"),
92
            "sample_name": "indexcheck",
93
            "sample_ref": "hg19",
94
        }
95

96
    @staticmethod
1✔
97
    def get_project_name(project: str, delimiter=SPACE) -> str:
1✔
98
        """Only keeps the first part of the project name"""
99
        return project.split(delimiter)[0]
×
100

101
    @staticmethod
1✔
102
    def get_reverse_complement_dna_seq(dna: str) -> str:
1✔
103
        """Generates the reverse complement of a DNA sequence"""
104
        complement = {"A": "T", "C": "G", "G": "C", "T": "A"}
×
105
        return "".join(complement[base] for base in reversed(dna))
×
106

107
    @staticmethod
1✔
108
    def is_dual_index(index: str, delimiter=DASH) -> bool:
1✔
109
        """Determines if an index in the raw samplesheet is dual index or not"""
110
        return delimiter in index
×
111

112
    @staticmethod
1✔
113
    def is_dummy_sample_in_samplesheet(dummy_index: str, sample_indexes: list) -> bool:
1✔
114
        """Determines if a dummy sample is already present in the samplesheet"""
115
        return any(
×
116
            sample_index.startswith(dummy_index) for sample_index in sample_indexes
117
        )
118

119
    @staticmethod
1✔
120
    def get_sample_indexes_in_lane(lane: str, raw_samplesheet: List[Dict]) -> list:
1✔
121
        """Returns all sample indexes in a given lane"""
122
        return [sample["index"] for sample in raw_samplesheet if sample["lane"] == lane]
×
123

124
    def replace_project_with_lims_sample_name(
1✔
125
        self, raw_samplesheet: List[Dict]
126
    ) -> List[Dict]:
127
        """Replaces the project in the SampleName column with the LIMS Sample Name"""
128
        for sample in raw_samplesheet:
×
129
            sample["sample_name"] = self.lims_api.sample(sample["sample_id"]).name
×
130
        return raw_samplesheet
×
131

132
    def is_reverse_complement(self) -> bool:
1✔
133
        """
134
        If the run used NovaSeq control software version REV_COMP_CONTROL_SOFTWARE_VERSION or later and reagent
135
        kit version REV_COMP_REAGENT_KIT_VERSION or later, the second index should be the reverse complement
136
        """
137
        return (
×
138
            self.control_software_version >= REV_COMP_CONTROL_SOFTWARE_VERSION
139
            and self.reagent_kit_version >= REV_COMP_REAGENT_KIT_VERSION
140
        )
141

142
    def is_nipt_samplesheet(self) -> bool:
1✔
143
        """Determines if a sample sheet if for NIPT demultiplexing, based on the index length in the run paramaters"""
144
        return self.runparameters.index_reads == NIPT_INDEX_LENGTH
×
145

146
    def add_dummy_indexes(self, raw_samplesheet: List[Dict]) -> List[Dict]:
1✔
147
        """Add all dummy indexes to raw sample sheet. Dummy indexes are used to check for index
148
        contamination"""
149
        with open(f"{self.dummy_indexes_file}") as csv_file:
×
150
            dummy_samples_csv = csv.reader(csv_file, delimiter=COMMA)
×
151
            dummy_samples = [row for row in dummy_samples_csv]
×
152
            new_dummy_samples = []
×
153
            lanes = {sample["lane"] for sample in raw_samplesheet}
×
154

155
            for lane in lanes:
×
156
                sample_indexes = self.get_sample_indexes_in_lane(lane, raw_samplesheet)
×
157
                for sample_name, dummy_index in dummy_samples:
×
158
                    if not self.is_dummy_sample_in_samplesheet(
×
159
                        dummy_index, sample_indexes
160
                    ):
161
                        new_dummy_sample = self.get_dummy_sample_information(
×
162
                            dummy_index,
163
                            lane,
164
                            sample_name,
165
                        )
166
                        new_dummy_samples.append(new_dummy_sample)
×
167

168
            raw_samplesheet.extend(new_dummy_samples)
×
169

170
            return raw_samplesheet
×
171

172
    def remove_unwanted_indexes(self, raw_samplesheet: List[Dict]) -> List[Dict]:
1✔
173
        """Filter out indexes of unwanted length and single indexes"""
174

175
        raw_samplesheet = [
×
176
            line for line in raw_samplesheet if self.is_dual_index(line["index"])
177
        ]
178
        return raw_samplesheet
×
179

180
    def adapt_indexes(self, raw_samplesheet: List[Dict]) -> List[Dict]:
1✔
181
        """Adapts the indexes: pads all indexes so that all indexes have a length equal to the number  of index reads,
182
        and takes the reverse complement of index 2 in case of the new novaseq software control version
183
        (1.7) in combination with the new reagent kit (version 1.5)"""
184

185
        is_reverse_complement = self.is_reverse_complement()
×
186

187
        for line in raw_samplesheet:
×
188
            index1, index2 = line["index"].split("-")
×
189
            if self.pad and len(index1) == 8:
×
190
                line["index"], line["index2"] = self.pad_and_rc_indexes(
×
191
                    index1, index2, is_reverse_complement
192
                )
193
            elif len(index2) == 10:
×
194
                line["index"] = index1
×
195
                line["index2"] = (
×
196
                    self.get_reverse_complement_dna_seq(index2)
197
                    if is_reverse_complement
198
                    else index2
199
                )
200
            else:
201
                line["index"], line["index2"] = index1, index2
×
202

203
        return raw_samplesheet
×
204

205
    def pad_and_rc_indexes(
1✔
206
        self, index1: str, index2: str, is_reverse_complement: bool
207
    ) -> tuple:
208
        """Pads and reverse complements indexes"""
209

210
        if self.runparameters.index_reads == 8:
×
211
            index2 = (
×
212
                self.get_reverse_complement_dna_seq(index2)
213
                if is_reverse_complement
214
                else index2
215
            )
216
        if self.runparameters.index_reads == 10:
×
217
            index1 += "AT"
×
218
            index2 = (
×
219
                self.get_reverse_complement_dna_seq("AC" + index2)
220
                if is_reverse_complement
221
                else index2 + "AC"
222
            )
223

224
        return index1, index2
×
225

226
    def get_raw_samplesheet(self) -> List[Dict]:
1✔
227
        raw_samplesheet = list(self.lims_api.samplesheet(self.flowcell))
×
228
        if not raw_samplesheet:
×
229
            sys.stderr.write(f"Samplesheet for {self.flowcell} not found in LIMS! ")
×
230
            sys.exit()
×
231
        return raw_samplesheet
×
232

233
    def construct_samplesheet(self, delimiter=COMMA, end="\n") -> str:
1✔
234
        """Construct the sample sheet"""
235

236
        demux_samplesheet = [delimiter.join(self.header)]
×
237
        raw_samplesheet = self.get_raw_samplesheet()
×
238
        raw_samplesheet = self.replace_project_with_lims_sample_name(raw_samplesheet)
×
239
        if not self.is_nipt_samplesheet():
×
240
            raw_samplesheet = self.add_dummy_indexes(raw_samplesheet)
×
241
        raw_samplesheet = self.remove_unwanted_indexes(raw_samplesheet)
×
242
        raw_samplesheet = self.adapt_indexes(raw_samplesheet)
×
243
        for line in raw_samplesheet:
×
244
            # fix the project content
245
            project = self.get_project_name(line["project"])
×
246
            line["project"] = project
×
247

248
            demux_samplesheet.append(
×
249
                delimiter.join([str(line[lims_key]) for lims_key in self.LIMS_KEYS])
250
            )
251

252
        return end.join(demux_samplesheet)
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc