• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

sequana / sequana_pipetools / 4075608515

pending completion
4075608515

Pull #51

github

GitHub
Merge 173da2f1c into fd3b61e95
Pull Request #51: Fix CI (remove conda)

4 of 4 new or added lines in 2 files covered. (100.0%)

1380 of 1520 relevant lines covered (90.79%)

2.72 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

84.24
/sequana_pipetools/snaketools/module.py
1
#
2
#  This file is part of Sequana software
3
#
4
#  Copyright (c) 2016-2021 - Sequana Dev Team (https://sequana.readthedocs.io)
5
#
6
#  Distributed under the terms of the 3-clause BSD license.
7
#  The full license is in the LICENSE file, distributed with this software.
8
#
9
#  Website:       https://github.com/sequana/sequana
10
#  Documentation: http://sequana.readthedocs.io
11
#  Contributors:  https://github.com/sequana/sequana/graphs/contributors
12
##############################################################################
13
import os
3✔
14

15
import colorlog
3✔
16
import easydev
3✔
17

18
from .module_finder import ModuleFinder
3✔
19

20
logger = colorlog.getLogger(__name__)
3✔
21

22

23
class Module:
3✔
24
    """Data structure that holds metadata about a **Module**
25

26
    In Sequana, we provide rules and pipelines to be used with snakemake.
27
    Snakemake rules look like::
28

29
        rule <name>:
30
            :input: file1
31
            :output: file2
32
            :shell: "cp file1 file2"
33

34
    A pipeline may look like::
35

36
        include: "path_to_rule1"
37
        include: "path_to_rule2"
38
        rule all:
39
            input: FINAL_FILES
40

41
    Note that the pipeline includes rules by providing the path to them.
42

43
    All rules can be stored in a single directory. Similarly for pipelines.
44
    We decided not to use that convention. Instead, we bundle rules (and
45
    pipelines) in their own directories so that other files can be stored
46
    with them. We also consider that
47

48
        #. if the **Snakefile** includes other **Snakefile** then
49
           it is **Pipeline**.
50
        #. Otherwise it is a simple **Rule**.
51

52
    So, a **Module** in sequana's parlance is a directory that contains a
53
    rule or a pipeline and associated files. There is currently no strict
54
    conventions for rule Modules except for their own rule file. However,
55
    pipeline Modules should have the following files:
56

57
        - A **snakemake** file named after the directory with the extension
58
          **.rules**
59
        - A **README.rst** file in restructured text format
60
        - An optional config file in YAML format named config.yaml.
61
          Although json format is possible, we use YAML throughout
62
          **sequana** for consistency. Rules do not have any but pipelines
63
          do. So if a pipeline does not provide a config.yaml, the one found
64
          in ./sequana/sequana/pipelines will be used.
65
        - a **requirements.txt**
66

67
    .. note:: Developers who wish to include new rules should refer to the
68
        Developer guide.
69

70
    .. note:: it is important that module's name should be used to name
71
        the directory and the rule/pipeline.
72

73
    The **Modules** are stored in sequana/rules and sequana/pipelines
74
    directories. The modules' names cannot be duplicated.
75

76
    Example::
77

78
        pipelines/test_pipe/test_pipe.rules
79
        pipelines/test_pipe/README.rst
80
        rules/rule1/rule1.rules
81
        rules/rule1/README.rst
82

83
    The :class:`Module` will ease the retrieval of information linked to a
84
    rule or pipeline. For instance if a pipeline has a config file, its path
85
    can be retrived easily::
86

87
        m = Module("quality_control")
88
        m.config
89

90
    This Module may be rule or pipeline, the method :meth:`is_pipeline` can
91
    be used to get that information.
92

93
    """
94

95
    def __init__(self, name):
3✔
96
        """.. rubric:: Constructor
97

98
        :param str name: the name of an available module.
99

100
        """
101
        self._mf = ModuleFinder()
3✔
102
        self._mf.isvalid(name)
3✔
103

104
        if name not in self._mf.names:
3✔
105
            raise ValueError(
3✔
106
                """Sequana error: unknown rule or pipeline '{}'.
107
Check the source code at:
108

109
    https://github.com/sequana/sequana/tree/develop/sequana/pipelines and
110
    https://github.com/sequana/sequana/tree/develop/sequana/rules
111

112
or open a Python shell and type::
113

114
    from sequana_pipetools.snaketools.module import modules
115
    modules.keys()""".format(
116
                    name
117
                )
118
            )
119
        else:
120
            self._path = self._mf._paths[name]
3✔
121

122
        self._name = name
3✔
123

124
        self._snakefile = None
3✔
125
        self._description = None
3✔
126
        self._requirements = None
3✔
127

128
    def is_pipeline(self):
3✔
129
        """Return true is this module is a pipeline"""
130
        if self._name.startswith("pipeline:"):
3✔
131
            return True
3✔
132
        else:
133
            return False
3✔
134

135
    def _get_file(self, name):
3✔
136
        filename = os.sep.join((self._path, name))
3✔
137
        if os.path.exists(filename):
3✔
138
            return filename
3✔
139

140
    def __repr__(self):
3✔
141
        str = "Name: %s\n" % self._name
3✔
142
        str += "Path: %s\n" % self.path
3✔
143
        str += "Config: %s\n" % self.config
3✔
144
        str += "Cluster config: %s\n" % self.cluster_config
3✔
145
        str += "Schema for config file: %s\n" % self.schema_config
3✔
146
        str += "Multiqc config file: %s\n" % self.multiqc_config
3✔
147
        str += "requirements file: %s\n" % self.requirements
3✔
148
        str += "version: %s\n" % self.version
3✔
149
        return str
3✔
150

151
    def __str__(self):
3✔
152
        txt = "Rule **" + self.name + "**:\n" + self.description
3✔
153
        return txt
3✔
154

155
    def _get_version(self):
3✔
156
        if "/" in self.name:
3✔
157
            return self.name.split("/")[1]
3✔
158
        elif self.is_pipeline():
3✔
159
            import pkg_resources
×
160

161
            name = self.name.replace("pipeline:", "")
×
162
            ver = pkg_resources.require(f"sequana_{name}")[0].version
×
163
            return ver
×
164

165
    version = property(_get_version, doc="Get version")
3✔
166

167
    def _get_path(self):
3✔
168
        return self._path
3✔
169

170
    path = property(_get_path, doc="full path to the module directory")
3✔
171

172
    def _get_config(self):
3✔
173
        # list of module config file and sequana default config file
174
        default_filenames = ("config.yaml", "config.yml", "../config.yaml", "../config.yml")
3✔
175
        for default_filename in default_filenames:
3✔
176
            filename = self._get_file(default_filename)
3✔
177
            if filename:
3✔
178
                return filename
3✔
179
        return filename
3✔
180

181
    config = property(_get_config, doc="full path to the config file of the module")
3✔
182

183
    def _get_schema_config(self):
3✔
184
        # The default config file for that module
185
        default_filenames = ("schema.yaml", "schema.yml", "../schema.yaml", "../schema.yml")
3✔
186
        for default_filename in default_filenames:
3✔
187
            filename = self._get_file(default_filename)
3✔
188
            if filename:
3✔
189
                return filename
3✔
190
        return filename
3✔
191

192
    schema_config = property(_get_schema_config, doc="full path to the schema config file of the module")
3✔
193

194
    def _get_multiqc_config(self):
3✔
195
        filename = self._get_file("multiqc_config.yaml")
3✔
196
        return filename
3✔
197

198
    multiqc_config = property(_get_multiqc_config, doc="full path to the multiqc config file of the module")
3✔
199

200
    def _get_logo(self):
3✔
201
        filename = self._get_file("logo.png")
3✔
202
        return filename
3✔
203

204
    logo = property(_get_logo, doc="full path to the logo of the module")
3✔
205

206
    def _get_cluster_config(self):
3✔
207
        # The default config file for that module
208
        return self._get_file("cluster_config.json")
3✔
209

210
    cluster_config = property(_get_cluster_config, doc="full path to the config cluster file of the module")
3✔
211

212
    def _get_readme(self):
3✔
213
        return self._get_file("README.rst")
3✔
214

215
    readme = property(_get_readme, doc="full path to the README file of the module")
3✔
216

217
    def _get_overview(self):
3✔
218
        result = "no information. For developers: please fix the pipeline "
3✔
219
        result += "README.rst file by adding an :Overview: field"
3✔
220
        for this in self.description.split("\n"):
3✔
221
            if this.startswith(":Overview:"):
3✔
222
                try:
×
223
                    result = this.split(":Overview:")[1].strip()
×
224
                except IndexError:
×
225
                    result += "Bad format in :Overview: field"
×
226
        return result
3✔
227

228
    overview = property(_get_overview)
3✔
229

230
    def _get_snakefile(self):
3✔
231
        if self._snakefile:
3✔
232
            return self._snakefile
3✔
233

234
        # tuple of all possible snakefiles
235
        possible_snakefiles = (
3✔
236
            "Snakefile",
237
            f"Snakefile.{self.name}",
238
            f"{self.name}.rules",
239
            f"{self.name}.smk",
240
            f"{self.name.replace('pipeline:', '')}.rules",
241
            f"{self.name.replace('pipeline:', '')}.smk",
242
        )
243

244
        # find the good one
245
        for snakefile in possible_snakefiles:
3✔
246
            self._snakefile = self._get_file(snakefile)
3✔
247
            if self._snakefile:
3✔
248
                return self._snakefile
3✔
249

250
        # find with version
251
        if self.version:
3✔
252
            name, _ = self.name.split("/")
3✔
253
            name = os.sep.join((self._path, f"{name}.rules"))
3✔
254
            self._snakefile = name
3✔
255
        return self._snakefile
3✔
256

257
    snakefile = property(_get_snakefile, doc="full path to the Snakefile file of the module")
3✔
258

259
    def _get_rules(self):
3✔
260
        return self._get_file("rules")
3✔
261

262
    rules = property(_get_rules, "full path to the pipeline rules")
3✔
263

264
    def _get_name(self):
3✔
265
        return self._name
3✔
266

267
    name = property(_get_name, doc="name of the module")
3✔
268

269
    def _get_requirements(self):
3✔
270
        if self._requirements is not None:
3✔
271
            return self._requirements
3✔
272
        if self._get_file("requirements.txt"):
3✔
273
            self._requirements = self._get_file("requirements.txt")
3✔
274
            return self._requirements
3✔
275

276
    requirements = property(_get_requirements, doc="list of requirements")
3✔
277

278
    def is_executable(self):
3✔
279
        """Is the module executable
280

281
        A Pipeline Module should have a requirements.txt file that is
282
        introspected to check if all executables are available;
283

284
        :return: a tuple. First element is a boolean to tell if it executable.
285
            Second element is the list of missing executables.
286
        """
287
        if self.requirements is None:
3✔
288
            return True, []
3✔
289

290
        executable = True
3✔
291
        missing = []
3✔
292

293
        # reads the file and interpret it to figure out the
294
        # executables/packages and pipelines required
295
        pipelines = []
3✔
296

297
        with open(self.requirements, "r") as fh:
3✔
298
            data = fh.read()
3✔
299
            datalist = [this.strip() for this in data.split("\n") if len(this.strip()) > 0]
3✔
300
            reqlist = []
3✔
301
            for this in datalist:
3✔
302
                if this.startswith("-"):
3✔
303
                    req = this.split("-", 1)[1].strip()
×
304
                    if req.startswith("["):
×
305
                        req = req.replace("[", "")
×
306
                        req = req.replace("]", "")
×
307
                        pipelines.append(req)
×
308
                    else:
309
                        reqlist.append(req)
×
310
                else:
311
                    req = this.strip()
3✔
312
                    if req.startswith("["):
3✔
313
                        req = req.replace("[", "")
×
314
                        req = req.replace("]", "")
×
315
                        pipelines.append(req)
×
316
                    else:
317
                        reqlist.append(req)
3✔
318

319
        # Check the pipelines independently
320
        for pipeline in pipelines:
3✔
321
            Module(pipeline).check()
×
322

323
        for req in reqlist:
3✔
324
            # It is either a Python package or an executable
325
            if req.startswith("#"):
3✔
326
                continue
×
327
            try:
3✔
328
                easydev.shellcmd(f"which {req}")
3✔
329
                logger.debug(f"Found {req} executable")
3✔
330
            except Exception:
×
331
                # is this a Python code ?
332
                if len(easydev.get_dependencies(req)) == 0:
×
333
                    executable = False
×
334
                    missing.append(req)
×
335
                else:
336
                    logger.info(f"{req} python package")
×
337
        return executable, missing
3✔
338

339
    def check(self, mode="warning"):
3✔
340

341
        executable, missing = self.is_executable()
3✔
342

343
        if executable is False:
3✔
344
            # _ = self.is_executable()
345
            missing = " ".join(missing)
×
346
            txt = f"""Some executable or Python packages are not available: {missing}
×
347
Some functionalities may not work. Consider adding them with conda or set the --use-apptainer options.
348

349
            """
350

351
            if mode == "warning":
×
352
                logger.critical(txt)
×
353
            elif mode == "error":  # pragma: no cover
354
                txt += "you may want to use \n conda install {missing};"
355
                for this in missing:
356
                    txt += "- %s\n" % this
357
                raise ValueError(txt)
358

359
    def _get_description(self):
3✔
360
        try:
3✔
361
            with open(self.readme) as fh:
3✔
362
                self._description = fh.read()
×
363
        except TypeError:
3✔
364
            self._description = "no description"
3✔
365
        return self._description
3✔
366

367
    description = property(_get_description, doc=("Content of the README file associated with "))
3✔
368

369
    def md5(self):
3✔
370
        """return md5 of snakefile and its default configuration file
371

372
        ::
373

374
            >>> from sequana import snaketools as sm
375
            >>> m = sm.Module("variant_calling")
376
            >>> m.md5()
377
            {'config': 'e23b26a2ff45fa9ddb36c40670a8a00e',
378
             'snakefile': '7d3917743a6b123d9861ddbbb5f3baef'}
379

380
        """
381
        data = {}
3✔
382
        data["snakefile"] = easydev.md5(self.snakefile)
3✔
383
        data["config"] = easydev.md5(self.config)
3✔
384
        return data
3✔
385

386

387
def _get_modules_snakefiles():
3✔
388
    modules = ModuleFinder()
3✔
389
    for name in modules.names:
3✔
390
        module = Module(name)
3✔
391
        filename = module.snakefile
3✔
392
        if filename:
3✔
393
            yield name, filename
3✔
394

395

396
# dictionary with module names as keys and fullpath to the Snakefile as values
397
modules = {name: filename for name, filename in _get_modules_snakefiles()}
3✔
398

399
# list of pipeline names found in the list of modules
400
pipeline_names = [m for m in modules if Module(m).is_pipeline()]
3✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc