• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

Ouranosinc / miranda / 2266807674

pending completion
2266807674

Pull #33

github

GitHub
Merge 58a4f4d99 into dad775e9d
Pull Request #33: Support CORDEX and CMIP5/6

34 of 242 new or added lines in 16 files covered. (14.05%)

10 existing lines in 7 files now uncovered.

661 of 3268 relevant lines covered (20.23%)

0.61 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

20.0
/miranda/archive/groupings.py
1
import logging
3✔
2
import re
3✔
3
from logging.config import dictConfig
3✔
4
from pathlib import Path
3✔
5
from types import GeneratorType
3✔
6
from typing import Dict, List, Union
3✔
7

8
from miranda.scripting import LOGGING_CONFIG
3✔
9
from miranda.storage import report_file_size
3✔
10

11
dictConfig(LOGGING_CONFIG)
3✔
12
Nested_List = List[List[Path]]
3✔
13
PathDict = Dict[str, List[Path]]
3✔
14

15

16
GiB = int(pow(2, 30))
3✔
17

18
__all__ = [
3✔
19
    "group_by_deciphered_date",
20
    "group_by_length",
21
    "group_by_size",
22
    "group_by_subdirectories",
23
]
24

25

26
def group_by_length(
3✔
27
    files: Union[GeneratorType, List[Union[str, Path]]], size: int = 10
28
) -> List[List[Path]]:
29
    """Group files by an arbitrary number of file entries.
30

31
    Parameters
32
    ----------
33
    files
34
    size
35

36
    Returns
37
    -------
38

39
    """
40
    logging.info(f"Creating groups of {size} files")
×
41
    files = [Path(f) for f in files]
×
42
    files.sort()
×
43
    grouped_list = list()
×
44
    group = list()
×
45
    for i, f in enumerate(files):
×
46
        group.append(f)
×
47
        if (i + 1) % size == 0:
×
48
            grouped_list.append(group.copy())
×
49
            group.clear()
×
50
            continue
×
51
    if not group:
×
52
        pass
×
53
    else:
54
        grouped_list.append(group.copy())
×
55
    logging.info(f"Divided files into {len(grouped_list)} groups.")
×
56
    return grouped_list
×
57

58

59
def group_by_deciphered_date(
3✔
60
    files: Union[GeneratorType, List[Union[str, Path]]]
61
) -> Dict[str, List[Path]]:
62
    """Find a common date and groups files based on year and month.
63

64
    Parameters
65
    ----------
66
    files
67

68
    Returns
69
    -------
70

71
    """
72
    logging.warning("This function doesn't work well with multi-thread processing!")
×
73
    logging.info("Creating files from deciphered dates.")
×
74

75
    year_month_day = re.compile(
×
76
        r"(?P<year>\d{4})-?(?P<month>\d{2})-?(?P<day>\d{2})?.*\.(?P<suffix>nc|zarr)$"
77
    )
78

79
    files = [Path(f) for f in files]
×
80
    files.sort()
×
81
    dates = dict()
×
82
    total = 0
×
83
    for f in files:
×
84
        match = re.search(year_month_day, str(Path(f).name))
×
85
        if match.group("day"):
×
86
            key = "-".join([match.group("year"), match.group("month")])
×
87
            dates.setdefault(key, list()).append(Path(f))
×
88
            total += 1
×
89
        elif match.group("month"):
×
90
            key = match.group("year")
×
91
            dates.setdefault(key, list()).append(Path(f))
×
92
            total += 1
×
93
        else:
94
            continue
×
95

96
    if dates and total == len(files):
×
97
        logging.info(
×
98
            f"All files have been grouped by date. {len(dates)} groups created."
99
        )
100
        return dict(dates)
×
101

102
    if dates and total != len(files):
×
103
        logging.info(
×
104
            "Not all files were successfully grouped by date. Grouping aborted."
105
        )
106
    else:
107
        logging.info("No matches for dates found. Grouping aborted.")
×
108
    return dict(data=files)
×
109

110

111
def group_by_size(
3✔
112
    files: Union[GeneratorType, List[Union[str, Path]]], size: int = 10 * GiB
113
) -> List[List[Path]]:
114
    """Group files up until a desired size and save it as a grouping within a list.
115

116
    Parameters
117
    ----------
118
    files
119
    size
120

121
    Returns
122
    -------
123

124
    """
125

UNCOV
126
    logging.info(
×
127
        f"Creating groups of files based on size not exceeding: {report_file_size(size)}."
128
    )
129

130
    files = [Path(f) for f in files]
×
131
    files.sort()
×
132
    grouped_list = list()
×
133
    group = list()
×
134
    total = 0
×
135
    for f in files:
×
136
        total += Path.stat(f).st_size
×
137
        group.append(f)
×
138
        if total > size:
×
139
            grouped_list.append(group.copy())
×
140
            group.clear()
×
141
            total = 0
×
142

143
    if not group:
×
144
        logging.info("The final group is empty. Skipping this set...")
×
145
    else:
146
        grouped_list.append(group.copy())
×
147
    return grouped_list
×
148

149

150
def group_by_subdirectories(
3✔
151
    files: Union[GeneratorType, List[Union[str, Path]]], within: str or Path = None
152
) -> Dict[str, List[Path]]:
153
    """
154
    This function will group files based on the parent folder that they are located within.
155
    """
156
    if not within:
×
157
        within = Path.cwd()
×
158

159
    files = [Path(f) for f in files]
×
160
    files.sort()
×
161
    groups = dict()
×
162
    for f in files:
×
163
        group_name = Path(f).relative_to(within).parent
×
164
        groups.setdefault(group_name, list()).append(f)
×
165

166
    logging.info(
×
167
        f"File subdirectories found. Proceeding with: `{', '.join([str(key) for key in groups.keys()])}`."
168
    )
169
    return groups
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2024 Coveralls, Inc