20926766534

Committed 12 Jan 2026 04:23PM UTC coverage: 86.168% (+0.6%) from 85.574%

Build # 20926766534

Build Type

push

github

Committed by

k1o0

Commit Message

Fix tests and dump command

Run Details

8379 of 9724 relevant lines covered (86.17%)

0.86 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

94.55

alyx/data/models.py

import logging
from one.alf.spec import QC

from django.core.validators import RegexValidator
from django.db import models
from django.conf import settings
from django.utils import timezone
from django.contrib.contenttypes.fields import GenericForeignKey
from django.contrib.contenttypes.models import ContentType

from actions.models import Session
from alyx.base import BaseModel, modify_fields, BaseManager, CharNullField, BaseQuerySet, ALF_SPEC

logger = logging.getLogger(__name__)


def _related_string(field):
    return "%(app_label)s_%(class)s_" + field + "_related"


def default_timezone():
    return settings.TIME_ZONE


# Data repositories
# ------------------------------------------------------------------------------------------------

class NameManager(models.Manager):
    def get_by_natural_key(self, name):
        return self.get(name=name)


class DataRepositoryType(BaseModel):
    """
    A type of data repository, e.g. local SAMBA file server; web archive; LTO tape
    """
    objects = NameManager()

    name = models.CharField(max_length=255, unique=True)

    class Meta:
        ordering = ('name',)

    def __str__(self):
        return "<DataRepositoryType '%s'>" % self.name


class DataRepository(BaseModel):
    """
    A data repository e.g. a particular local drive, specific cloud storage
    location, or a specific tape.

    Stores an absolute path to the repository root as a URI (e.g. for SMB
    file://myserver.mylab.net/Data/ALF/; for web
    https://www.neurocloud.edu/Data/). Additional information about the
    repository can stored in JSON  in a type-specific manner (e.g. which
    cardboard box to find a tape in)
    """
    objects = NameManager()

    name = models.CharField(max_length=255, unique=True)
    repository_type = models.ForeignKey(
        DataRepositoryType, null=True, blank=True, on_delete=models.CASCADE)
    hostname = models.CharField(
        max_length=200, blank=True,
        validators=[RegexValidator(r'^[a-zA-Z0-9\.\-\_]+$',
                                   message='Invalid hostname',
                                   code='invalid_hostname')],
        help_text="Host name of the network drive")
    data_url = models.URLField(
        blank=True, null=True,
        help_text="URL of the data repository, if it is accessible via HTTP")
    timezone = models.CharField(
        max_length=64, blank=True, default=default_timezone,
        help_text="Timezone of the server "
        "(see https://en.wikipedia.org/wiki/List_of_tz_database_time_zones)")
    globus_path = models.CharField(
        max_length=1000, blank=True,
        help_text="absolute path to the repository on the server e.g. /mnt/something/")
    globus_endpoint_id = models.UUIDField(
        blank=True, null=True, help_text="UUID of the globus endpoint")
    globus_is_personal = models.BooleanField(
        null=True, blank=True, help_text="whether the Globus endpoint is personal or not. "
        "By default, Globus cannot transfer a file between two personal endpoints.")

    def __str__(self):
        return "<DataRepository '%s'>" % self.name

    class Meta:
        verbose_name_plural = "data repositories"
        ordering = ('name',)


# Datasets
# ------------------------------------------------------------------------------------------------

class DataFormat(BaseModel):
    """
    A descriptor to accompany a Dataset or DataCollection, saying what sort of information is
    contained in it. E.g. "Neuropixels raw data, formatted as flat binary file" "eye camera
    movie as mj2", etc. Normally each DatasetType will correspond to a specific 3-part alf name
    (for individual files) or the first word of the alf names (for DataCollections)
    """

    objects = NameManager()

    name = models.CharField(
        max_length=255, unique=True,
        help_text="short identifying name, e.g. 'npy'")

    description = models.CharField(
        max_length=255, blank=True,
        help_text="Human-readable description of the file format e.g. 'npy-formatted square "
        "numerical array'.")

    file_extension = models.CharField(
        max_length=255,
        validators=[RegexValidator(r'^\.[^\.]+$',
                                   message='Invalid file extension, should start with a dot',
                                   code='invalid_file_extension')],
        help_text="file extension, starting with a dot.")

    matlab_loader_function = models.CharField(
        max_length=255, blank=True,
        help_text="Name of MATLAB loader function'.")

    python_loader_function = models.CharField(
        max_length=255, blank=True,
        help_text="Name of Python loader function'.")

    class Meta:
        verbose_name_plural = "data formats"
        ordering = ('name',)

    def __str__(self):
        return "<DataFormat '%s'>" % self.name


class DatasetType(BaseModel):
    """
    A descriptor to accompany a Dataset or DataCollection, saying what sort of information is
    contained in it. E.g. "Neuropixels raw data, formatted as flat binary file" "eye camera
    movie as mj2", etc. Normally each DatasetType will correspond to a specific 3-part alf name
    (for individual files) or the first word of the alf names (for DataCollections)
    """

    objects = NameManager()

    name = models.CharField(
        max_length=255, unique=True, blank=True, null=False,
        help_text="Short identifying nickname, e.g. 'spikes.times'")

    created_by = models.ForeignKey(
        settings.AUTH_USER_MODEL, blank=True, null=True,
        on_delete=models.CASCADE,
        related_name=_related_string('created_by'),
        help_text="The creator of the data.")

    description = models.CharField(
        max_length=1023, blank=True,
        help_text="Human-readable description of data type. Should say what is in the file, and "
        "how to read it. For DataCollections, it should list what Datasets are expected in the "
        "the collection. E.g. 'Files related to spike events, including spikes.times.npy, "
        "spikes.clusters.npy, spikes.amps.npy, spikes.depths.npy")

    filename_pattern = CharNullField(
        max_length=255, unique=True, null=True, blank=True,
        help_text="File name pattern (with wildcards) for this file in ALF naming convention. "
        "E.g. 'spikes.times.*' or '*.timestamps.*', or 'spikes.*.*' for a DataCollection, which "
        "would include all files starting with the word 'spikes'. NB: Case-insensitive matching."
        "If null, the name field must match the object.attribute part of the filename."
    )

    class Meta:
        ordering = ('name',)

    def __str__(self):
        return "<DatasetType %s>" % self.name

    def save(self, *args, **kwargs):
        """Ensure filename_pattern is lower case."""
        if self.filename_pattern:
            self.filename_pattern = self.filename_pattern.lower()
        return super().save(*args, **kwargs)


class BaseExperimentalData(BaseModel):
    """
    Abstract base class for all data acquisition models. Never used directly.

    Contains an Session link, to provide information about who did the experiment etc. Note that
    sessions can be organized hierarchically, and this can point to any level of the hierarchy
    """
    session = models.ForeignKey(
        Session, blank=True, null=True,
        on_delete=models.CASCADE,
        related_name=_related_string('session'),
        help_text="The Session to which this data belongs")

    created_by = models.ForeignKey(
        settings.AUTH_USER_MODEL, blank=True, null=True,
        on_delete=models.CASCADE,
        related_name=_related_string('created_by'),
        help_text="The creator of the data.")

    created_datetime = models.DateTimeField(
        blank=True, null=True, default=timezone.now,
        help_text="The creation datetime.")

    generating_software = models.CharField(
        max_length=255, blank=True,
        help_text="e.g. 'ChoiceWorld 0.8.3'")

    provenance_directory = models.ForeignKey(
        'data.Dataset', blank=True, null=True,
        on_delete=models.CASCADE,
        related_name=_related_string('provenance'),
        help_text="link to directory containing intermediate results")

    class Meta:
        abstract = True


def default_dataset_type():
    return DatasetType.objects.get_or_create(name='unknown')[0].pk


def default_data_format():
    return DataFormat.objects.get_or_create(name='unknown')[0].pk


class Tag(BaseModel):
    objects = NameManager()
    name = models.CharField(max_length=255, blank=True, help_text="Long name", unique=True)
    description = models.CharField(max_length=1023, blank=True)
    protected = models.BooleanField(default=False)
    public = models.BooleanField(default=False)
    hash = models.CharField(blank=True, null=True, max_length=64,
                            help_text=("Hash of the data buffer, SHA-1 is 40 hex chars, while md5"
                                       "is 32 hex chars"))

    class Meta:
        ordering = ('name',)

    def __str__(self):
        return "<Tag %s>" % self.name


class Revision(BaseModel):
    """
    Dataset revision information
    """
    objects = NameManager()
    name_validator = RegexValidator(f"^{ALF_SPEC['revision']}$",
                                    "Revisions must only contain letters, "
                                    "numbers, hyphens, underscores and forward slashes.")
    name = models.CharField(max_length=255, blank=True, help_text="Long name",
                            unique=True, null=False, validators=[name_validator])
    description = models.CharField(max_length=1023, blank=True)
    created_datetime = models.DateTimeField(blank=True, null=True, default=timezone.now,
                                            help_text="created date")

    class Meta:
        ordering = ('name',)

    def __str__(self):
        return "<Revision %s>" % self.name

    def save(self, *args, **kwargs):
        self.clean_fields()
        return super(Revision, self).save(*args, **kwargs)


class DatasetQuerySet(BaseQuerySet):
    """A Queryset that checks for protected datasets before deletion"""

    def delete(self, force=False):
        if (protected := self.filter(tags__protected=True)).exists():
            if force:
                logger.warning('The following protected datasets will be deleted:\n%s',
                               '\n'.join(map(str, protected.values_list('name', 'session_id'))))
            else:
                logger.error(
                    'The following protected datasets cannot be deleted without force=True:\n%s',
                    '\n'.join(map(str, protected.values_list('name', 'session_id'))))
                raise models.ProtectedError(
                    f'Failed to delete {protected.count()} dataset(s) due to protected tags',
                    protected)
        super().delete()


class DatasetManager(BaseManager):
    def get_queryset(self):
        qs = DatasetQuerySet(self.model, using=self._db)
        qs = qs.select_related('dataset_type', 'data_format')
        return qs


@modify_fields(name={
    'blank': False,
})
class Dataset(BaseExperimentalData):
    """
    A chunk of data that is stored outside the database, most often a rectangular binary array.
    There can be multiple FileRecords for one Dataset, which will be different physical files,
    all containing identical data, with the same MD5.

    Note that by convention, binary arrays are stored as .npy and text arrays as .tsv
    """
    objects = DatasetManager()

    # Generic foreign key to arbitrary model instances allows polymorphic relationships
    content_type = models.ForeignKey(ContentType, on_delete=models.CASCADE, null=True, blank=True)
    object_id = models.UUIDField(help_text="UUID of an object whose type matches content_type.",
                                 null=True, blank=True)
    content_object = GenericForeignKey()

    file_size = models.BigIntegerField(blank=True, null=True, help_text="Size in bytes")

    md5 = models.UUIDField(blank=True, null=True,
                           help_text="MD5 hash of the data buffer")

    hash = models.CharField(blank=True, null=False, max_length=64,
                            help_text=("Hash of the data buffer, SHA-1 is 40 hex chars, while md5"
                                       "is 32 hex chars"))

    # here we usually refer to version as an algorithm version such as ibllib-1.4.2
    version = models.CharField(blank=True, null=False, max_length=64,
                               help_text="version of the algorithm generating the file")

    # the collection comprises session sub-folders
    collection_validator = RegexValidator(f"^{ALF_SPEC['collection']}$",
                                          "Collections must only contain letters, "
                                          "numbers, hyphens, underscores and forward slashes.")
    collection = models.CharField(blank=True, null=False, max_length=255,
                                  help_text='file subcollection or subfolder',
                                  validators=[collection_validator])

    dataset_type = models.ForeignKey(
        DatasetType, blank=False, null=False, on_delete=models.SET_DEFAULT,
        default=default_dataset_type)

    data_format = models.ForeignKey(
        DataFormat, blank=False, null=False, on_delete=models.SET_DEFAULT,
        default=default_data_format)

    revision = models.ForeignKey(
        Revision, blank=True, null=True, on_delete=models.SET_NULL)

    tags = models.ManyToManyField('data.Tag', blank=True, related_name='datasets')

    auto_datetime = models.DateTimeField(auto_now=True, blank=True, null=True,
                                         verbose_name='last updated')

    default_dataset = models.BooleanField(default=True,
                                          help_text="Whether this dataset is the default "
                                                    "latest revision")

    QC_CHOICES = [(e.value, e.name) for e in QC]
    qc = models.IntegerField(default=QC.NOT_SET, choices=QC_CHOICES,
                             help_text=' / '.join([str(q[0]) + ': ' + q[1] for q in QC_CHOICES]))

    @property
    def is_online(self):
        fr = self.file_records.filter(data_repository__globus_is_personal=False)
        return bool(fr.count() and any(fr.values_list('exists', flat=True)))

    @property
    def is_protected(self):
        return bool(self.tags.filter(protected=True).count())

    @property
    def is_public(self):
        return bool(self.tags.filter(public=True).count())

    @property
    def data_url(self):
        records = self.file_records.filter(data_repository__data_url__isnull=False, exists=True)
        # returns preferentially globus non-personal endpoint
        if records:
            order_keys = ('data_repository__globus_is_personal', '-data_repository__name')
            return records.order_by(*order_keys)[0].data_url

    def __str__(self):
        date = self.created_datetime.strftime('%d/%m/%Y at %H:%M')
        return "<Dataset %s %s '%s' by %s on %s>" % (
            str(self.pk)[:8], getattr(self.dataset_type, 'name', ''),
            self.name, self.created_by, date)

    def save(self, *args, **kwargs):
        # when a dataset is saved / created make sure the probe insertion is set in the reverse m2m
        super(Dataset, self).save(*args, **kwargs)
        if not self.collection:
            return
        self.clean_fields()  # Validate collection field
        from experiments.models import ProbeInsertion, FOV
        parts = self.collection.rsplit('/')
        if len(parts) > 1:
            name = parts[1]
            pis = ProbeInsertion.objects.filter(session=self.session, name=name)
            if len(pis):
                self.probe_insertion.set(pis.values_list('pk', flat=True))
            fovs = FOV.objects.filter(session=self.session, name=name)
            if len(fovs):
                self.field_of_view.set(fovs.values_list('pk', flat=True))

    def delete(self, *args, force=False, **kwargs):
        # If a dataset is protected and force=False, raise an exception
        # NB This is not called when bulk deleting or in cascading deletes
        if self.is_protected and not force:
            tags = self.tags.filter(protected=True).values_list('name', flat=True)
            tags_str = '"' + '", "'.join(tags) + '"'
            logger.error(f'Dataset {self.name} is protected by tag(s); use force=True.')
            raise models.ProtectedError(
                f'Failed to delete dataset {self.name} due to protected tag(s) {tags_str}', self)
        super().delete(*args, **kwargs)


# Files
# ------------------------------------------------------------------------------------------------
class FileRecordManager(models.Manager):
    def get_queryset(self):
        qs = super(FileRecordManager, self).get_queryset()
        qs = qs.select_related('data_repository')
        return qs


class FileRecord(BaseModel):
    """
    A single file on disk or tape. Normally specified by a path within an archive. If required,
    more details can be in the JSON
    """

    objects = FileRecordManager()

    dataset = models.ForeignKey(Dataset, related_name='file_records', on_delete=models.CASCADE)

    data_repository = models.ForeignKey(
        'DataRepository', on_delete=models.CASCADE)

    relative_path = models.CharField(
        max_length=1000,
        validators=[RegexValidator(r'^[a-zA-Z0-9\_][^\\\:]+$',
                                   message='Invalid path',
                                   code='invalid_path')],
        help_text="path name within repository")

    exists = models.BooleanField(
        default=False, help_text="Whether the file exists in the data repository", )

    class Meta:
        unique_together = (('data_repository', 'relative_path'),)

    @property
    def data_url(self):
        root = self.data_repository.data_url
        if not root:
            return None
        from one.alf.path import add_uuid_string
        return root + add_uuid_string(self.relative_path, self.dataset.pk).as_posix()

    def save(self, *args, **kwargs):
        """this is to trigger the update of the auto-date field"""
        super(FileRecord, self).save(*args, **kwargs)
        # Save the dataset as well to make sure the auto datetime in the dateset is updated when
        # associated file record is saved
        self.dataset.save()

    def __str__(self):
        return "<FileRecord '%s' by %s>" % (self.relative_path, self.dataset.created_by)


# Download table
# ------------------------------------------------------------------------------------------------

class Download(BaseModel):
    user = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE)
    dataset = models.ForeignKey(Dataset, on_delete=models.CASCADE)
    first_download = models.DateTimeField(auto_now_add=True)
    last_download = models.DateTimeField(auto_now=True)
    count = models.IntegerField(default=0)
    projects = models.ManyToManyField('subjects.Project', blank=True)

    class Meta:
        unique_together = (('user', 'dataset'),)

    def increment(self):
        self.count += 1
        self.save()

    def __str__(self):
        return '<Download of %s dataset by %s (%d)>' % (
            self.dataset.dataset_type.name, self.user.username, self.count)


def new_download(dataset, user, projects=()):
    d, _ = Download.objects.get_or_create(user=user, dataset=dataset)
    d.projects.add(*projects)
    d.increment()
    return d

1	import logging	1✔
2	from one.alf.spec import QC	1✔
3
4	from django.core.validators import RegexValidator	1✔
5	from django.db import models	1✔
6	from django.conf import settings	1✔
7	from django.utils import timezone	1✔
8	from django.contrib.contenttypes.fields import GenericForeignKey	1✔
9	from django.contrib.contenttypes.models import ContentType	1✔
10
11	from actions.models import Session	1✔
12	from alyx.base import BaseModel, modify_fields, BaseManager, CharNullField, BaseQuerySet, ALF_SPEC	1✔
13
14	logger = logging.getLogger(__name__)	1✔
15
16
17	def _related_string(field):	1✔
18	return "%(app_label)s_%(class)s_" + field + "_related"	1✔
19
20
21	def default_timezone():	1✔
22	return settings.TIME_ZONE	1✔
23
24
25	# Data repositories
26	# ------------------------------------------------------------------------------------------------
27
28	class NameManager(models.Manager):	1✔
29	def get_by_natural_key(self, name):	1✔
30	return self.get(name=name)	×
31
32
33	class DataRepositoryType(BaseModel):	1✔
34	"""
35	A type of data repository, e.g. local SAMBA file server; web archive; LTO tape
36	"""
37	objects = NameManager()	1✔
38
39	name = models.CharField(max_length=255, unique=True)	1✔
40
41	class Meta:	1✔
42	ordering = ('name',)	1✔
43
44	def __str__(self):	1✔
45	return "<DataRepositoryType '%s'>" % self.name	×
46
47
48	class DataRepository(BaseModel):	1✔
49	"""
50	A data repository e.g. a particular local drive, specific cloud storage
51	location, or a specific tape.
52
53	Stores an absolute path to the repository root as a URI (e.g. for SMB
54	file://myserver.mylab.net/Data/ALF/; for web
55	https://www.neurocloud.edu/Data/). Additional information about the
56	repository can stored in JSON in a type-specific manner (e.g. which
57	cardboard box to find a tape in)
58	"""
59	objects = NameManager()	1✔
60
61	name = models.CharField(max_length=255, unique=True)	1✔
62	repository_type = models.ForeignKey(	1✔
63	DataRepositoryType, null=True, blank=True, on_delete=models.CASCADE)
64	hostname = models.CharField(	1✔
65	max_length=200, blank=True,
66	validators=[RegexValidator(r'^[a-zA-Z0-9\.\-\_]+$',
67	message='Invalid hostname',
68	code='invalid_hostname')],
69	help_text="Host name of the network drive")
70	data_url = models.URLField(	1✔
71	blank=True, null=True,
72	help_text="URL of the data repository, if it is accessible via HTTP")
73	timezone = models.CharField(	1✔
74	max_length=64, blank=True, default=default_timezone,
75	help_text="Timezone of the server "
76	"(see https://en.wikipedia.org/wiki/List_of_tz_database_time_zones)")
77	globus_path = models.CharField(	1✔
78	max_length=1000, blank=True,
79	help_text="absolute path to the repository on the server e.g. /mnt/something/")
80	globus_endpoint_id = models.UUIDField(	1✔
81	blank=True, null=True, help_text="UUID of the globus endpoint")
82	globus_is_personal = models.BooleanField(	1✔
83	null=True, blank=True, help_text="whether the Globus endpoint is personal or not. "
84	"By default, Globus cannot transfer a file between two personal endpoints.")
85
86	def __str__(self):	1✔
87	return "<DataRepository '%s'>" % self.name	1✔
88
89	class Meta:	1✔
90	verbose_name_plural = "data repositories"	1✔
91	ordering = ('name',)	1✔
92
93
94	# Datasets
95	# ------------------------------------------------------------------------------------------------
96
97	class DataFormat(BaseModel):	1✔
98	"""
99	A descriptor to accompany a Dataset or DataCollection, saying what sort of information is
100	contained in it. E.g. "Neuropixels raw data, formatted as flat binary file" "eye camera
101	movie as mj2", etc. Normally each DatasetType will correspond to a specific 3-part alf name
102	(for individual files) or the first word of the alf names (for DataCollections)
103	"""
104
105	objects = NameManager()	1✔
106
107	name = models.CharField(	1✔
108	max_length=255, unique=True,
109	help_text="short identifying name, e.g. 'npy'")
110
111	description = models.CharField(	1✔
112	max_length=255, blank=True,
113	help_text="Human-readable description of the file format e.g. 'npy-formatted square "
114	"numerical array'.")
115
116	file_extension = models.CharField(	1✔
117	max_length=255,
118	validators=[RegexValidator(r'^\.[^\.]+$',
119	message='Invalid file extension, should start with a dot',
120	code='invalid_file_extension')],
121	help_text="file extension, starting with a dot.")
122
123	matlab_loader_function = models.CharField(	1✔
124	max_length=255, blank=True,
125	help_text="Name of MATLAB loader function'.")
126
127	python_loader_function = models.CharField(	1✔
128	max_length=255, blank=True,
129	help_text="Name of Python loader function'.")
130
131	class Meta:	1✔
132	verbose_name_plural = "data formats"	1✔
133	ordering = ('name',)	1✔
134
135	def __str__(self):	1✔
136	return "<DataFormat '%s'>" % self.name	1✔
137
138
139	class DatasetType(BaseModel):	1✔
140	"""
141	A descriptor to accompany a Dataset or DataCollection, saying what sort of information is
142	contained in it. E.g. "Neuropixels raw data, formatted as flat binary file" "eye camera
143	movie as mj2", etc. Normally each DatasetType will correspond to a specific 3-part alf name
144	(for individual files) or the first word of the alf names (for DataCollections)
145	"""
146
147	objects = NameManager()	1✔
148
149	name = models.CharField(	1✔
150	max_length=255, unique=True, blank=True, null=False,
151	help_text="Short identifying nickname, e.g. 'spikes.times'")
152
153	created_by = models.ForeignKey(	1✔
154	settings.AUTH_USER_MODEL, blank=True, null=True,
155	on_delete=models.CASCADE,
156	related_name=_related_string('created_by'),
157	help_text="The creator of the data.")
158
159	description = models.CharField(	1✔
160	max_length=1023, blank=True,
161	help_text="Human-readable description of data type. Should say what is in the file, and "
162	"how to read it. For DataCollections, it should list what Datasets are expected in the "
163	"the collection. E.g. 'Files related to spike events, including spikes.times.npy, "
164	"spikes.clusters.npy, spikes.amps.npy, spikes.depths.npy")
165
166	filename_pattern = CharNullField(	1✔
167	max_length=255, unique=True, null=True, blank=True,
168	help_text="File name pattern (with wildcards) for this file in ALF naming convention. "
169	"E.g. 'spikes.times.' or '.timestamps.', or 'spikes..*' for a DataCollection, which "
170	"would include all files starting with the word 'spikes'. NB: Case-insensitive matching."
171	"If null, the name field must match the object.attribute part of the filename."
172	)
173
174	class Meta:	1✔
175	ordering = ('name',)	1✔
176
177	def __str__(self):	1✔
178	return "<DatasetType %s>" % self.name	1✔
179
180	def save(self, args, *kwargs):	1✔
181	"""Ensure filename_pattern is lower case."""
182	if self.filename_pattern:	1✔
183	self.filename_pattern = self.filename_pattern.lower()	1✔
184	return super().save(args, *kwargs)	1✔
185
186
187	class BaseExperimentalData(BaseModel):	1✔
188	"""
189	Abstract base class for all data acquisition models. Never used directly.
190
191	Contains an Session link, to provide information about who did the experiment etc. Note that
192	sessions can be organized hierarchically, and this can point to any level of the hierarchy
193	"""
194	session = models.ForeignKey(	1✔
195	Session, blank=True, null=True,
196	on_delete=models.CASCADE,
197	related_name=_related_string('session'),
198	help_text="The Session to which this data belongs")
199
200	created_by = models.ForeignKey(	1✔
201	settings.AUTH_USER_MODEL, blank=True, null=True,
202	on_delete=models.CASCADE,
203	related_name=_related_string('created_by'),
204	help_text="The creator of the data.")
205
206	created_datetime = models.DateTimeField(	1✔
207	blank=True, null=True, default=timezone.now,
208	help_text="The creation datetime.")
209
210	generating_software = models.CharField(	1✔
211	max_length=255, blank=True,
212	help_text="e.g. 'ChoiceWorld 0.8.3'")
213
214	provenance_directory = models.ForeignKey(	1✔
215	'data.Dataset', blank=True, null=True,
216	on_delete=models.CASCADE,
217	related_name=_related_string('provenance'),
218	help_text="link to directory containing intermediate results")
219
220	class Meta:	1✔
221	abstract = True	1✔
222
223
224	def default_dataset_type():	1✔
225	return DatasetType.objects.get_or_create(name='unknown')[0].pk	1✔
226
227
228	def default_data_format():	1✔
229	return DataFormat.objects.get_or_create(name='unknown')[0].pk	1✔
230
231
232	class Tag(BaseModel):	1✔
233	objects = NameManager()	1✔
234	name = models.CharField(max_length=255, blank=True, help_text="Long name", unique=True)	1✔
235	description = models.CharField(max_length=1023, blank=True)	1✔
236	protected = models.BooleanField(default=False)	1✔
237	public = models.BooleanField(default=False)	1✔
238	hash = models.CharField(blank=True, null=True, max_length=64,	1✔
239	help_text=("Hash of the data buffer, SHA-1 is 40 hex chars, while md5"
240	"is 32 hex chars"))
241
242	class Meta:	1✔
243	ordering = ('name',)	1✔
244
245	def __str__(self):	1✔
246	return "<Tag %s>" % self.name	×
247
248
249	class Revision(BaseModel):	1✔
250	"""
251	Dataset revision information
252	"""
253	objects = NameManager()	1✔
254	name_validator = RegexValidator(f"^{ALF_SPEC['revision']}$",	1✔
255	"Revisions must only contain letters, "
256	"numbers, hyphens, underscores and forward slashes.")
257	name = models.CharField(max_length=255, blank=True, help_text="Long name",	1✔
258	unique=True, null=False, validators=[name_validator])
259	description = models.CharField(max_length=1023, blank=True)	1✔
260	created_datetime = models.DateTimeField(blank=True, null=True, default=timezone.now,	1✔
261	help_text="created date")
262
263	class Meta:	1✔
264	ordering = ('name',)	1✔
265
266	def __str__(self):	1✔
267	return "<Revision %s>" % self.name	×
268
269	def save(self, args, *kwargs):	1✔
270	self.clean_fields()	1✔
271	return super(Revision, self).save(args, *kwargs)	1✔
272
273
274	class DatasetQuerySet(BaseQuerySet):	1✔
275	"""A Queryset that checks for protected datasets before deletion"""
276
277	def delete(self, force=False):	1✔
278	if (protected := self.filter(tags__protected=True)).exists():	1✔
279	if force:	1✔
280	logger.warning('The following protected datasets will be deleted:\n%s',	1✔
281	'\n'.join(map(str, protected.values_list('name', 'session_id'))))
282	else:
283	logger.error(	1✔
284	'The following protected datasets cannot be deleted without force=True:\n%s',
285	'\n'.join(map(str, protected.values_list('name', 'session_id'))))
286	raise models.ProtectedError(	1✔
287	f'Failed to delete {protected.count()} dataset(s) due to protected tags',
288	protected)
289	super().delete()	1✔
290
291
292	class DatasetManager(BaseManager):	1✔
293	def get_queryset(self):	1✔
294	qs = DatasetQuerySet(self.model, using=self._db)	1✔
295	qs = qs.select_related('dataset_type', 'data_format')	1✔
296	return qs	1✔
297
298
299	@modify_fields(name={	1✔
300	'blank': False,
301	})
302	class Dataset(BaseExperimentalData):	1✔
303	"""
304	A chunk of data that is stored outside the database, most often a rectangular binary array.
305	There can be multiple FileRecords for one Dataset, which will be different physical files,
306	all containing identical data, with the same MD5.
307
308	Note that by convention, binary arrays are stored as .npy and text arrays as .tsv
309	"""
310	objects = DatasetManager()	1✔
311
312	# Generic foreign key to arbitrary model instances allows polymorphic relationships
313	content_type = models.ForeignKey(ContentType, on_delete=models.CASCADE, null=True, blank=True)	1✔
314	object_id = models.UUIDField(help_text="UUID of an object whose type matches content_type.",	1✔
315	null=True, blank=True)
316	content_object = GenericForeignKey()	1✔
317
318	file_size = models.BigIntegerField(blank=True, null=True, help_text="Size in bytes")	1✔
319
320	md5 = models.UUIDField(blank=True, null=True,	1✔
321	help_text="MD5 hash of the data buffer")
322
323	hash = models.CharField(blank=True, null=False, max_length=64,	1✔
324	help_text=("Hash of the data buffer, SHA-1 is 40 hex chars, while md5"
325	"is 32 hex chars"))
326
327	# here we usually refer to version as an algorithm version such as ibllib-1.4.2
328	version = models.CharField(blank=True, null=False, max_length=64,	1✔
329	help_text="version of the algorithm generating the file")
330
331	# the collection comprises session sub-folders
332	collection_validator = RegexValidator(f"^{ALF_SPEC['collection']}$",	1✔
333	"Collections must only contain letters, "
334	"numbers, hyphens, underscores and forward slashes.")
335	collection = models.CharField(blank=True, null=False, max_length=255,	1✔
336	help_text='file subcollection or subfolder',
337	validators=[collection_validator])
338
339	dataset_type = models.ForeignKey(	1✔
340	DatasetType, blank=False, null=False, on_delete=models.SET_DEFAULT,
341	default=default_dataset_type)
342
343	data_format = models.ForeignKey(	1✔
344	DataFormat, blank=False, null=False, on_delete=models.SET_DEFAULT,
345	default=default_data_format)
346
347	revision = models.ForeignKey(	1✔
348	Revision, blank=True, null=True, on_delete=models.SET_NULL)
349
350	tags = models.ManyToManyField('data.Tag', blank=True, related_name='datasets')	1✔
351
352	auto_datetime = models.DateTimeField(auto_now=True, blank=True, null=True,	1✔
353	verbose_name='last updated')
354
355	default_dataset = models.BooleanField(default=True,	1✔
356	help_text="Whether this dataset is the default "
357	"latest revision")
358
359	QC_CHOICES = [(e.value, e.name) for e in QC]	1✔
360	qc = models.IntegerField(default=QC.NOT_SET, choices=QC_CHOICES,	1✔
361	help_text=' / '.join([str(q[0]) + ': ' + q[1] for q in QC_CHOICES]))
362
363	@property	1✔
364	def is_online(self):	1✔
365	fr = self.file_records.filter(data_repository__globus_is_personal=False)	1✔
366	return bool(fr.count() and any(fr.values_list('exists', flat=True)))	1✔
367
368	@property	1✔
369	def is_protected(self):	1✔
370	return bool(self.tags.filter(protected=True).count())	1✔
371
372	@property	1✔
373	def is_public(self):	1✔
374	return bool(self.tags.filter(public=True).count())	1✔
375
376	@property	1✔
377	def data_url(self):	1✔
378	records = self.file_records.filter(data_repository__data_url__isnull=False, exists=True)	1✔
379	# returns preferentially globus non-personal endpoint
380	if records:	1✔
381	order_keys = ('data_repository__globus_is_personal', '-data_repository__name')	×
382	return records.order_by(*order_keys)[0].data_url	×
383
384	def __str__(self):	1✔
385	date = self.created_datetime.strftime('%d/%m/%Y at %H:%M')	1✔
386	return "<Dataset %s %s '%s' by %s on %s>" % (	1✔
387	str(self.pk)[:8], getattr(self.dataset_type, 'name', ''),
388	self.name, self.created_by, date)
389
390	def save(self, args, *kwargs):	1✔
391	# when a dataset is saved / created make sure the probe insertion is set in the reverse m2m
392	super(Dataset, self).save(args, *kwargs)	1✔
393	if not self.collection:	1✔
394	return	1✔
395	self.clean_fields() # Validate collection field	1✔
396	from experiments.models import ProbeInsertion, FOV	1✔
397	parts = self.collection.rsplit('/')	1✔
398	if len(parts) > 1:	1✔
399	name = parts[1]	1✔
400	pis = ProbeInsertion.objects.filter(session=self.session, name=name)	1✔
401	if len(pis):	1✔
402	self.probe_insertion.set(pis.values_list('pk', flat=True))	1✔
403	fovs = FOV.objects.filter(session=self.session, name=name)	1✔
404	if len(fovs):	1✔
405	self.field_of_view.set(fovs.values_list('pk', flat=True))	×
406
407	def delete(self, args, force=False, *kwargs):	1✔
408	# If a dataset is protected and force=False, raise an exception
409	# NB This is not called when bulk deleting or in cascading deletes
410	if self.is_protected and not force:	1✔
411	tags = self.tags.filter(protected=True).values_list('name', flat=True)	1✔
412	tags_str = '"' + '", "'.join(tags) + '"'	1✔
413	logger.error(f'Dataset {self.name} is protected by tag(s); use force=True.')	1✔
414	raise models.ProtectedError(	1✔
415	f'Failed to delete dataset {self.name} due to protected tag(s) {tags_str}', self)
416	super().delete(args, *kwargs)	×
417
418
419	# Files
420	# ------------------------------------------------------------------------------------------------
421	class FileRecordManager(models.Manager):	1✔
422	def get_queryset(self):	1✔
423	qs = super(FileRecordManager, self).get_queryset()	1✔
424	qs = qs.select_related('data_repository')	1✔
425	return qs	1✔
426
427
428	class FileRecord(BaseModel):	1✔
429	"""
430	A single file on disk or tape. Normally specified by a path within an archive. If required,
431	more details can be in the JSON
432	"""
433
434	objects = FileRecordManager()	1✔
435
436	dataset = models.ForeignKey(Dataset, related_name='file_records', on_delete=models.CASCADE)	1✔
437
438	data_repository = models.ForeignKey(	1✔
439	'DataRepository', on_delete=models.CASCADE)
440
441	relative_path = models.CharField(	1✔
442	max_length=1000,
443	validators=[RegexValidator(r'^[a-zA-Z0-9\_][^\\\:]+$',
444	message='Invalid path',
445	code='invalid_path')],
446	help_text="path name within repository")
447
448	exists = models.BooleanField(	1✔
449	default=False, help_text="Whether the file exists in the data repository", )
450
451	class Meta:	1✔
452	unique_together = (('data_repository', 'relative_path'),)	1✔
453
454	@property	1✔
455	def data_url(self):	1✔
456	root = self.data_repository.data_url	1✔
457	if not root:	1✔
458	return None	1✔
459	from one.alf.path import add_uuid_string	×
460	return root + add_uuid_string(self.relative_path, self.dataset.pk).as_posix()	×
461
462	def save(self, args, *kwargs):	1✔
463	"""this is to trigger the update of the auto-date field"""
464	super(FileRecord, self).save(args, *kwargs)	1✔
465	# Save the dataset as well to make sure the auto datetime in the dateset is updated when
466	# associated file record is saved
467	self.dataset.save()	1✔
468
469	def __str__(self):	1✔
470	return "<FileRecord '%s' by %s>" % (self.relative_path, self.dataset.created_by)	×
471
472
473	# Download table
474	# ------------------------------------------------------------------------------------------------
475
476	class Download(BaseModel):	1✔
477	user = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE)	1✔
478	dataset = models.ForeignKey(Dataset, on_delete=models.CASCADE)	1✔
479	first_download = models.DateTimeField(auto_now_add=True)	1✔
480	last_download = models.DateTimeField(auto_now=True)	1✔
481	count = models.IntegerField(default=0)	1✔
482	projects = models.ManyToManyField('subjects.Project', blank=True)	1✔
483
484	class Meta:	1✔
485	unique_together = (('user', 'dataset'),)	1✔
486
487	def increment(self):	1✔
488	self.count += 1	1✔
489	self.save()	1✔
490
491	def __str__(self):	1✔
492	return '<Download of %s dataset by %s (%d)>' % (	×
493	self.dataset.dataset_type.name, self.user.username, self.count)
494
495
496	def new_download(dataset, user, projects=()):	1✔
497	d, _ = Download.objects.get_or_create(user=user, dataset=dataset)	1✔
498	d.projects.add(*projects)	1✔
499	d.increment()	1✔
500	return d	1✔

cortex-lab / alyx / 20926766534

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous