• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

cortex-lab / alyx / 20926766534

12 Jan 2026 04:23PM UTC coverage: 86.168% (+0.6%) from 85.574%
20926766534

push

github

k1o0
Fix tests and dump command

8379 of 9724 relevant lines covered (86.17%)

0.86 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

94.55
alyx/data/models.py
1
import logging
1✔
2
from one.alf.spec import QC
1✔
3

4
from django.core.validators import RegexValidator
1✔
5
from django.db import models
1✔
6
from django.conf import settings
1✔
7
from django.utils import timezone
1✔
8
from django.contrib.contenttypes.fields import GenericForeignKey
1✔
9
from django.contrib.contenttypes.models import ContentType
1✔
10

11
from actions.models import Session
1✔
12
from alyx.base import BaseModel, modify_fields, BaseManager, CharNullField, BaseQuerySet, ALF_SPEC
1✔
13

14
logger = logging.getLogger(__name__)
1✔
15

16

17
def _related_string(field):
1✔
18
    return "%(app_label)s_%(class)s_" + field + "_related"
1✔
19

20

21
def default_timezone():
1✔
22
    return settings.TIME_ZONE
1✔
23

24

25
# Data repositories
26
# ------------------------------------------------------------------------------------------------
27

28
class NameManager(models.Manager):
1✔
29
    def get_by_natural_key(self, name):
1✔
30
        return self.get(name=name)
×
31

32

33
class DataRepositoryType(BaseModel):
1✔
34
    """
35
    A type of data repository, e.g. local SAMBA file server; web archive; LTO tape
36
    """
37
    objects = NameManager()
1✔
38

39
    name = models.CharField(max_length=255, unique=True)
1✔
40

41
    class Meta:
1✔
42
        ordering = ('name',)
1✔
43

44
    def __str__(self):
1✔
45
        return "<DataRepositoryType '%s'>" % self.name
×
46

47

48
class DataRepository(BaseModel):
1✔
49
    """
50
    A data repository e.g. a particular local drive, specific cloud storage
51
    location, or a specific tape.
52

53
    Stores an absolute path to the repository root as a URI (e.g. for SMB
54
    file://myserver.mylab.net/Data/ALF/; for web
55
    https://www.neurocloud.edu/Data/). Additional information about the
56
    repository can stored in JSON  in a type-specific manner (e.g. which
57
    cardboard box to find a tape in)
58
    """
59
    objects = NameManager()
1✔
60

61
    name = models.CharField(max_length=255, unique=True)
1✔
62
    repository_type = models.ForeignKey(
1✔
63
        DataRepositoryType, null=True, blank=True, on_delete=models.CASCADE)
64
    hostname = models.CharField(
1✔
65
        max_length=200, blank=True,
66
        validators=[RegexValidator(r'^[a-zA-Z0-9\.\-\_]+$',
67
                                   message='Invalid hostname',
68
                                   code='invalid_hostname')],
69
        help_text="Host name of the network drive")
70
    data_url = models.URLField(
1✔
71
        blank=True, null=True,
72
        help_text="URL of the data repository, if it is accessible via HTTP")
73
    timezone = models.CharField(
1✔
74
        max_length=64, blank=True, default=default_timezone,
75
        help_text="Timezone of the server "
76
        "(see https://en.wikipedia.org/wiki/List_of_tz_database_time_zones)")
77
    globus_path = models.CharField(
1✔
78
        max_length=1000, blank=True,
79
        help_text="absolute path to the repository on the server e.g. /mnt/something/")
80
    globus_endpoint_id = models.UUIDField(
1✔
81
        blank=True, null=True, help_text="UUID of the globus endpoint")
82
    globus_is_personal = models.BooleanField(
1✔
83
        null=True, blank=True, help_text="whether the Globus endpoint is personal or not. "
84
        "By default, Globus cannot transfer a file between two personal endpoints.")
85

86
    def __str__(self):
1✔
87
        return "<DataRepository '%s'>" % self.name
1✔
88

89
    class Meta:
1✔
90
        verbose_name_plural = "data repositories"
1✔
91
        ordering = ('name',)
1✔
92

93

94
# Datasets
95
# ------------------------------------------------------------------------------------------------
96

97
class DataFormat(BaseModel):
1✔
98
    """
99
    A descriptor to accompany a Dataset or DataCollection, saying what sort of information is
100
    contained in it. E.g. "Neuropixels raw data, formatted as flat binary file" "eye camera
101
    movie as mj2", etc. Normally each DatasetType will correspond to a specific 3-part alf name
102
    (for individual files) or the first word of the alf names (for DataCollections)
103
    """
104

105
    objects = NameManager()
1✔
106

107
    name = models.CharField(
1✔
108
        max_length=255, unique=True,
109
        help_text="short identifying name, e.g. 'npy'")
110

111
    description = models.CharField(
1✔
112
        max_length=255, blank=True,
113
        help_text="Human-readable description of the file format e.g. 'npy-formatted square "
114
        "numerical array'.")
115

116
    file_extension = models.CharField(
1✔
117
        max_length=255,
118
        validators=[RegexValidator(r'^\.[^\.]+$',
119
                                   message='Invalid file extension, should start with a dot',
120
                                   code='invalid_file_extension')],
121
        help_text="file extension, starting with a dot.")
122

123
    matlab_loader_function = models.CharField(
1✔
124
        max_length=255, blank=True,
125
        help_text="Name of MATLAB loader function'.")
126

127
    python_loader_function = models.CharField(
1✔
128
        max_length=255, blank=True,
129
        help_text="Name of Python loader function'.")
130

131
    class Meta:
1✔
132
        verbose_name_plural = "data formats"
1✔
133
        ordering = ('name',)
1✔
134

135
    def __str__(self):
1✔
136
        return "<DataFormat '%s'>" % self.name
1✔
137

138

139
class DatasetType(BaseModel):
1✔
140
    """
141
    A descriptor to accompany a Dataset or DataCollection, saying what sort of information is
142
    contained in it. E.g. "Neuropixels raw data, formatted as flat binary file" "eye camera
143
    movie as mj2", etc. Normally each DatasetType will correspond to a specific 3-part alf name
144
    (for individual files) or the first word of the alf names (for DataCollections)
145
    """
146

147
    objects = NameManager()
1✔
148

149
    name = models.CharField(
1✔
150
        max_length=255, unique=True, blank=True, null=False,
151
        help_text="Short identifying nickname, e.g. 'spikes.times'")
152

153
    created_by = models.ForeignKey(
1✔
154
        settings.AUTH_USER_MODEL, blank=True, null=True,
155
        on_delete=models.CASCADE,
156
        related_name=_related_string('created_by'),
157
        help_text="The creator of the data.")
158

159
    description = models.CharField(
1✔
160
        max_length=1023, blank=True,
161
        help_text="Human-readable description of data type. Should say what is in the file, and "
162
        "how to read it. For DataCollections, it should list what Datasets are expected in the "
163
        "the collection. E.g. 'Files related to spike events, including spikes.times.npy, "
164
        "spikes.clusters.npy, spikes.amps.npy, spikes.depths.npy")
165

166
    filename_pattern = CharNullField(
1✔
167
        max_length=255, unique=True, null=True, blank=True,
168
        help_text="File name pattern (with wildcards) for this file in ALF naming convention. "
169
        "E.g. 'spikes.times.*' or '*.timestamps.*', or 'spikes.*.*' for a DataCollection, which "
170
        "would include all files starting with the word 'spikes'. NB: Case-insensitive matching."
171
        "If null, the name field must match the object.attribute part of the filename."
172
    )
173

174
    class Meta:
1✔
175
        ordering = ('name',)
1✔
176

177
    def __str__(self):
1✔
178
        return "<DatasetType %s>" % self.name
1✔
179

180
    def save(self, *args, **kwargs):
1✔
181
        """Ensure filename_pattern is lower case."""
182
        if self.filename_pattern:
1✔
183
            self.filename_pattern = self.filename_pattern.lower()
1✔
184
        return super().save(*args, **kwargs)
1✔
185

186

187
class BaseExperimentalData(BaseModel):
1✔
188
    """
189
    Abstract base class for all data acquisition models. Never used directly.
190

191
    Contains an Session link, to provide information about who did the experiment etc. Note that
192
    sessions can be organized hierarchically, and this can point to any level of the hierarchy
193
    """
194
    session = models.ForeignKey(
1✔
195
        Session, blank=True, null=True,
196
        on_delete=models.CASCADE,
197
        related_name=_related_string('session'),
198
        help_text="The Session to which this data belongs")
199

200
    created_by = models.ForeignKey(
1✔
201
        settings.AUTH_USER_MODEL, blank=True, null=True,
202
        on_delete=models.CASCADE,
203
        related_name=_related_string('created_by'),
204
        help_text="The creator of the data.")
205

206
    created_datetime = models.DateTimeField(
1✔
207
        blank=True, null=True, default=timezone.now,
208
        help_text="The creation datetime.")
209

210
    generating_software = models.CharField(
1✔
211
        max_length=255, blank=True,
212
        help_text="e.g. 'ChoiceWorld 0.8.3'")
213

214
    provenance_directory = models.ForeignKey(
1✔
215
        'data.Dataset', blank=True, null=True,
216
        on_delete=models.CASCADE,
217
        related_name=_related_string('provenance'),
218
        help_text="link to directory containing intermediate results")
219

220
    class Meta:
1✔
221
        abstract = True
1✔
222

223

224
def default_dataset_type():
1✔
225
    return DatasetType.objects.get_or_create(name='unknown')[0].pk
1✔
226

227

228
def default_data_format():
1✔
229
    return DataFormat.objects.get_or_create(name='unknown')[0].pk
1✔
230

231

232
class Tag(BaseModel):
1✔
233
    objects = NameManager()
1✔
234
    name = models.CharField(max_length=255, blank=True, help_text="Long name", unique=True)
1✔
235
    description = models.CharField(max_length=1023, blank=True)
1✔
236
    protected = models.BooleanField(default=False)
1✔
237
    public = models.BooleanField(default=False)
1✔
238
    hash = models.CharField(blank=True, null=True, max_length=64,
1✔
239
                            help_text=("Hash of the data buffer, SHA-1 is 40 hex chars, while md5"
240
                                       "is 32 hex chars"))
241

242
    class Meta:
1✔
243
        ordering = ('name',)
1✔
244

245
    def __str__(self):
1✔
246
        return "<Tag %s>" % self.name
×
247

248

249
class Revision(BaseModel):
1✔
250
    """
251
    Dataset revision information
252
    """
253
    objects = NameManager()
1✔
254
    name_validator = RegexValidator(f"^{ALF_SPEC['revision']}$",
1✔
255
                                    "Revisions must only contain letters, "
256
                                    "numbers, hyphens, underscores and forward slashes.")
257
    name = models.CharField(max_length=255, blank=True, help_text="Long name",
1✔
258
                            unique=True, null=False, validators=[name_validator])
259
    description = models.CharField(max_length=1023, blank=True)
1✔
260
    created_datetime = models.DateTimeField(blank=True, null=True, default=timezone.now,
1✔
261
                                            help_text="created date")
262

263
    class Meta:
1✔
264
        ordering = ('name',)
1✔
265

266
    def __str__(self):
1✔
267
        return "<Revision %s>" % self.name
×
268

269
    def save(self, *args, **kwargs):
1✔
270
        self.clean_fields()
1✔
271
        return super(Revision, self).save(*args, **kwargs)
1✔
272

273

274
class DatasetQuerySet(BaseQuerySet):
1✔
275
    """A Queryset that checks for protected datasets before deletion"""
276

277
    def delete(self, force=False):
1✔
278
        if (protected := self.filter(tags__protected=True)).exists():
1✔
279
            if force:
1✔
280
                logger.warning('The following protected datasets will be deleted:\n%s',
1✔
281
                               '\n'.join(map(str, protected.values_list('name', 'session_id'))))
282
            else:
283
                logger.error(
1✔
284
                    'The following protected datasets cannot be deleted without force=True:\n%s',
285
                    '\n'.join(map(str, protected.values_list('name', 'session_id'))))
286
                raise models.ProtectedError(
1✔
287
                    f'Failed to delete {protected.count()} dataset(s) due to protected tags',
288
                    protected)
289
        super().delete()
1✔
290

291

292
class DatasetManager(BaseManager):
1✔
293
    def get_queryset(self):
1✔
294
        qs = DatasetQuerySet(self.model, using=self._db)
1✔
295
        qs = qs.select_related('dataset_type', 'data_format')
1✔
296
        return qs
1✔
297

298

299
@modify_fields(name={
1✔
300
    'blank': False,
301
})
302
class Dataset(BaseExperimentalData):
1✔
303
    """
304
    A chunk of data that is stored outside the database, most often a rectangular binary array.
305
    There can be multiple FileRecords for one Dataset, which will be different physical files,
306
    all containing identical data, with the same MD5.
307

308
    Note that by convention, binary arrays are stored as .npy and text arrays as .tsv
309
    """
310
    objects = DatasetManager()
1✔
311

312
    # Generic foreign key to arbitrary model instances allows polymorphic relationships
313
    content_type = models.ForeignKey(ContentType, on_delete=models.CASCADE, null=True, blank=True)
1✔
314
    object_id = models.UUIDField(help_text="UUID of an object whose type matches content_type.",
1✔
315
                                 null=True, blank=True)
316
    content_object = GenericForeignKey()
1✔
317

318
    file_size = models.BigIntegerField(blank=True, null=True, help_text="Size in bytes")
1✔
319

320
    md5 = models.UUIDField(blank=True, null=True,
1✔
321
                           help_text="MD5 hash of the data buffer")
322

323
    hash = models.CharField(blank=True, null=False, max_length=64,
1✔
324
                            help_text=("Hash of the data buffer, SHA-1 is 40 hex chars, while md5"
325
                                       "is 32 hex chars"))
326

327
    # here we usually refer to version as an algorithm version such as ibllib-1.4.2
328
    version = models.CharField(blank=True, null=False, max_length=64,
1✔
329
                               help_text="version of the algorithm generating the file")
330

331
    # the collection comprises session sub-folders
332
    collection_validator = RegexValidator(f"^{ALF_SPEC['collection']}$",
1✔
333
                                          "Collections must only contain letters, "
334
                                          "numbers, hyphens, underscores and forward slashes.")
335
    collection = models.CharField(blank=True, null=False, max_length=255,
1✔
336
                                  help_text='file subcollection or subfolder',
337
                                  validators=[collection_validator])
338

339
    dataset_type = models.ForeignKey(
1✔
340
        DatasetType, blank=False, null=False, on_delete=models.SET_DEFAULT,
341
        default=default_dataset_type)
342

343
    data_format = models.ForeignKey(
1✔
344
        DataFormat, blank=False, null=False, on_delete=models.SET_DEFAULT,
345
        default=default_data_format)
346

347
    revision = models.ForeignKey(
1✔
348
        Revision, blank=True, null=True, on_delete=models.SET_NULL)
349

350
    tags = models.ManyToManyField('data.Tag', blank=True, related_name='datasets')
1✔
351

352
    auto_datetime = models.DateTimeField(auto_now=True, blank=True, null=True,
1✔
353
                                         verbose_name='last updated')
354

355
    default_dataset = models.BooleanField(default=True,
1✔
356
                                          help_text="Whether this dataset is the default "
357
                                                    "latest revision")
358

359
    QC_CHOICES = [(e.value, e.name) for e in QC]
1✔
360
    qc = models.IntegerField(default=QC.NOT_SET, choices=QC_CHOICES,
1✔
361
                             help_text=' / '.join([str(q[0]) + ': ' + q[1] for q in QC_CHOICES]))
362

363
    @property
1✔
364
    def is_online(self):
1✔
365
        fr = self.file_records.filter(data_repository__globus_is_personal=False)
1✔
366
        return bool(fr.count() and any(fr.values_list('exists', flat=True)))
1✔
367

368
    @property
1✔
369
    def is_protected(self):
1✔
370
        return bool(self.tags.filter(protected=True).count())
1✔
371

372
    @property
1✔
373
    def is_public(self):
1✔
374
        return bool(self.tags.filter(public=True).count())
1✔
375

376
    @property
1✔
377
    def data_url(self):
1✔
378
        records = self.file_records.filter(data_repository__data_url__isnull=False, exists=True)
1✔
379
        # returns preferentially globus non-personal endpoint
380
        if records:
1✔
381
            order_keys = ('data_repository__globus_is_personal', '-data_repository__name')
×
382
            return records.order_by(*order_keys)[0].data_url
×
383

384
    def __str__(self):
1✔
385
        date = self.created_datetime.strftime('%d/%m/%Y at %H:%M')
1✔
386
        return "<Dataset %s %s '%s' by %s on %s>" % (
1✔
387
            str(self.pk)[:8], getattr(self.dataset_type, 'name', ''),
388
            self.name, self.created_by, date)
389

390
    def save(self, *args, **kwargs):
1✔
391
        # when a dataset is saved / created make sure the probe insertion is set in the reverse m2m
392
        super(Dataset, self).save(*args, **kwargs)
1✔
393
        if not self.collection:
1✔
394
            return
1✔
395
        self.clean_fields()  # Validate collection field
1✔
396
        from experiments.models import ProbeInsertion, FOV
1✔
397
        parts = self.collection.rsplit('/')
1✔
398
        if len(parts) > 1:
1✔
399
            name = parts[1]
1✔
400
            pis = ProbeInsertion.objects.filter(session=self.session, name=name)
1✔
401
            if len(pis):
1✔
402
                self.probe_insertion.set(pis.values_list('pk', flat=True))
1✔
403
            fovs = FOV.objects.filter(session=self.session, name=name)
1✔
404
            if len(fovs):
1✔
405
                self.field_of_view.set(fovs.values_list('pk', flat=True))
×
406

407
    def delete(self, *args, force=False, **kwargs):
1✔
408
        # If a dataset is protected and force=False, raise an exception
409
        # NB This is not called when bulk deleting or in cascading deletes
410
        if self.is_protected and not force:
1✔
411
            tags = self.tags.filter(protected=True).values_list('name', flat=True)
1✔
412
            tags_str = '"' + '", "'.join(tags) + '"'
1✔
413
            logger.error(f'Dataset {self.name} is protected by tag(s); use force=True.')
1✔
414
            raise models.ProtectedError(
1✔
415
                f'Failed to delete dataset {self.name} due to protected tag(s) {tags_str}', self)
416
        super().delete(*args, **kwargs)
×
417

418

419
# Files
420
# ------------------------------------------------------------------------------------------------
421
class FileRecordManager(models.Manager):
1✔
422
    def get_queryset(self):
1✔
423
        qs = super(FileRecordManager, self).get_queryset()
1✔
424
        qs = qs.select_related('data_repository')
1✔
425
        return qs
1✔
426

427

428
class FileRecord(BaseModel):
1✔
429
    """
430
    A single file on disk or tape. Normally specified by a path within an archive. If required,
431
    more details can be in the JSON
432
    """
433

434
    objects = FileRecordManager()
1✔
435

436
    dataset = models.ForeignKey(Dataset, related_name='file_records', on_delete=models.CASCADE)
1✔
437

438
    data_repository = models.ForeignKey(
1✔
439
        'DataRepository', on_delete=models.CASCADE)
440

441
    relative_path = models.CharField(
1✔
442
        max_length=1000,
443
        validators=[RegexValidator(r'^[a-zA-Z0-9\_][^\\\:]+$',
444
                                   message='Invalid path',
445
                                   code='invalid_path')],
446
        help_text="path name within repository")
447

448
    exists = models.BooleanField(
1✔
449
        default=False, help_text="Whether the file exists in the data repository", )
450

451
    class Meta:
1✔
452
        unique_together = (('data_repository', 'relative_path'),)
1✔
453

454
    @property
1✔
455
    def data_url(self):
1✔
456
        root = self.data_repository.data_url
1✔
457
        if not root:
1✔
458
            return None
1✔
459
        from one.alf.path import add_uuid_string
×
460
        return root + add_uuid_string(self.relative_path, self.dataset.pk).as_posix()
×
461

462
    def save(self, *args, **kwargs):
1✔
463
        """this is to trigger the update of the auto-date field"""
464
        super(FileRecord, self).save(*args, **kwargs)
1✔
465
        # Save the dataset as well to make sure the auto datetime in the dateset is updated when
466
        # associated file record is saved
467
        self.dataset.save()
1✔
468

469
    def __str__(self):
1✔
470
        return "<FileRecord '%s' by %s>" % (self.relative_path, self.dataset.created_by)
×
471

472

473
# Download table
474
# ------------------------------------------------------------------------------------------------
475

476
class Download(BaseModel):
1✔
477
    user = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE)
1✔
478
    dataset = models.ForeignKey(Dataset, on_delete=models.CASCADE)
1✔
479
    first_download = models.DateTimeField(auto_now_add=True)
1✔
480
    last_download = models.DateTimeField(auto_now=True)
1✔
481
    count = models.IntegerField(default=0)
1✔
482
    projects = models.ManyToManyField('subjects.Project', blank=True)
1✔
483

484
    class Meta:
1✔
485
        unique_together = (('user', 'dataset'),)
1✔
486

487
    def increment(self):
1✔
488
        self.count += 1
1✔
489
        self.save()
1✔
490

491
    def __str__(self):
1✔
492
        return '<Download of %s dataset by %s (%d)>' % (
×
493
            self.dataset.dataset_type.name, self.user.username, self.count)
494

495

496
def new_download(dataset, user, projects=()):
1✔
497
    d, _ = Download.objects.get_or_create(user=user, dataset=dataset)
1✔
498
    d.projects.add(*projects)
1✔
499
    d.increment()
1✔
500
    return d
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc