• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

Ouranosinc / miranda / 2117113978

pending completion
2117113978

Pull #24

github

GitHub
Merge 22a6d4cb6 into bf78f91b7
Pull Request #24: Add CMIP file structure, use pyessv controlled vocabularies, and major refactoring

234 of 1077 new or added lines in 35 files covered. (21.73%)

13 existing lines in 4 files now uncovered.

728 of 3221 relevant lines covered (22.6%)

0.68 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

19.32
/miranda/archive/groupings.py
1
import logging
3✔
2
import re
3✔
3
from logging.config import dictConfig
3✔
4
from pathlib import Path
3✔
5
from types import GeneratorType
3✔
6
from typing import Dict, List, Union
3✔
7

8
from miranda.scripting import LOGGING_CONFIG
3✔
9
from miranda.storage import report_file_size
3✔
10

11
dictConfig(LOGGING_CONFIG)
3✔
12
Nested_List = List[List[Path]]
3✔
13
PathDict = Dict[str, List[Path]]
3✔
14

15

16
GiB = int(pow(2, 30))
3✔
17

18
__all__ = [
3✔
19
    "group_by_deciphered_date",
20
    "group_by_length",
21
    "group_by_size",
22
    "group_by_subdirectories",
23
]
24

25

26
def group_by_length(
3✔
27
    files: Union[GeneratorType, List[Union[str, Path]]], size: int = 10
28
) -> List[List[Path]]:
29
    """
30
    This function groups files by an arbitrary number of file entries
31
    """
NEW
32
    logging.info(f"Creating groups of {size} files")
×
NEW
33
    files = [Path(f) for f in files]
×
NEW
34
    files.sort()
×
35
    grouped_list = list()
×
36
    group = list()
×
37
    for i, f in enumerate(files):
×
38
        group.append(f)
×
39
        if (i + 1) % size == 0:
×
40
            grouped_list.append(group.copy())
×
41
            group.clear()
×
42
            continue
×
43
    if not group:
×
44
        pass
×
45
    else:
46
        grouped_list.append(group.copy())
×
47
    logging.info("Divided files into %s groups." % len(grouped_list))
×
48
    return grouped_list
×
49

50

51
def group_by_deciphered_date(
3✔
52
    files: Union[GeneratorType, List[Union[str, Path]]]
53
) -> Dict[str, List[Path]]:
54
    """
55
    This function attempts to find a common date and groups files based on year and month
56
    """
57
    logging.warning("This function doesn't work well with multi-thread processing!")
×
58
    logging.info("Creating files from deciphered dates.")
×
59

60
    year_month_day = re.compile(
×
61
        r"(?P<year>[0-9]{4})-?(?P<month>[0-9]{2})-?(?P<day>[0-9]{2})?.*\.(?P<suffix>nc)$"
62
    )
63

NEW
64
    files = [Path(f) for f in files]
×
NEW
65
    files.sort()
×
66
    dates = dict()
×
67
    total = 0
×
68
    for f in files:
×
69
        match = re.search(year_month_day, str(Path(f).name))
×
70
        if match.group("day"):
×
71
            key = "-".join([match.group("year"), match.group("month")])
×
72
            dates.setdefault(key, list()).append(Path(f))
×
73
            total += 1
×
74
        elif match.group("month"):
×
75
            key = match.group("year")
×
76
            dates.setdefault(key, list()).append(Path(f))
×
77
            total += 1
×
78
        else:
79
            continue
×
80

81
    if dates and total == len(files):
×
82
        logging.info(
×
83
            f"All files have been grouped by date. {len(dates)} groups created."
84
        )
85
        return dict(dates)
×
86

87
    if dates and total != len(files):
×
88
        logging.info(
×
89
            "Not all files were successfully grouped by date. Grouping aborted."
90
        )
91
    else:
92
        logging.info("No matches for dates found. Grouping aborted.")
×
93
    return dict(data=files)
×
94

95

96
def group_by_size(
3✔
97
    files: Union[GeneratorType, List[Union[str, Path]]], size: int = 10 * GiB
98
) -> List[List[Path]]:
99
    """
100
    This function will group files up until a desired size and save it as a grouping within a list
101
    """
102
    logging.info(
×
103
        "Creating groups of files based on size not exceeding {}.".format(
104
            report_file_size(size)
105
        )
106
    )
107

NEW
108
    files = [Path(f) for f in files]
×
NEW
109
    files.sort()
×
110
    grouped_list = list()
×
111
    group = list()
×
112
    total = 0
×
113
    for f in files:
×
114
        total += Path.stat(f).st_size
×
115
        group.append(f)
×
116
        if total > size:
×
117
            grouped_list.append(group.copy())
×
118
            group.clear()
×
119
            total = 0
×
120
            continue
×
121
        elif total < size:
×
122
            continue
×
123

124
    if not group:
×
125
        logging.info("The final group is empty. Skipping this set...")
×
126
    else:
127
        grouped_list.append(group.copy())
×
128
    return grouped_list
×
129

130

131
def group_by_subdirectories(
3✔
132
    files: Union[GeneratorType, List[Union[str, Path]]], within: str or Path = None
133
) -> Dict[str, List[Path]]:
134
    """
135
    This function will group files based on the parent folder that they are located within.
136
    """
137
    if not within:
×
138
        within = Path.cwd()
×
139

NEW
140
    files = [Path(f) for f in files]
×
NEW
141
    files.sort()
×
142
    groups = dict()
×
143
    for f in files:
×
144
        group_name = Path(f).relative_to(within).parent
×
145
        groups.setdefault(group_name, list()).append(f)
×
146

147
    logging.info(
×
148
        "File subdirectories found. Proceeding with {}.".format(
149
            str([str(key) for key in groups.keys()])
150
        )
151
    )
152
    return groups
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2024 Coveralls, Inc