• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

MITLibraries / transmogrifier / 12319654873

13 Dec 2024 04:48PM UTC coverage: 98.751% (+2.3%) from 96.404%
12319654873

Pull #219

github

ghukill
Update --input-file CLI docstring
Pull Request #219: TIMX 405 - support output to TIMDEX parquet dataset

37 of 45 new or added lines in 2 files covered. (82.22%)

1 existing line in 1 file now uncovered.

1739 of 1761 relevant lines covered (98.75%)

0.99 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

88.89
/transmogrifier/cli.py
1
import logging
1✔
2
from datetime import timedelta
1✔
3
from time import perf_counter
1✔
4

5
import click
1✔
6

7
from transmogrifier.config import (
1✔
8
    SOURCES,
9
    configure_logger,
10
    configure_sentry,
11
    get_etl_version,
12
)
13
from transmogrifier.sources.transformer import Transformer
1✔
14

15
logger = logging.getLogger(__name__)
1✔
16

17

18
@click.command()
1✔
19
@click.option(
1✔
20
    "-i",
21
    "--input-file",
22
    required=True,
23
    help="Filepath of input records to transform.  The filename must be in the format "
24
    "<source>-<YYYY-MM-DD>-<run-type>-extracted-records-to-<action><index[optional]>"
25
    ".<extension>.  Examples: 'gisogm-2024-03-28-daily-extracted-records-to-index.jsonl' "
26
    "or 'alma-2023-01-13-full-extracted-records-to-index_17.xml'.",
27
)
28
# NOTE: FEATURE FLAG: CLI arg '--output-file' will be removed after v2 work is complete
29
@click.option(
1✔
30
    "--output-file",
31
    required=False,
32
    help="Filepath to write output TIMDEX JSON records to. NOTE: this option will be "
33
    "removed when output to parquet is finalized.",
34
)
35
@click.option(
1✔
36
    "-o",
37
    "--output-location",
38
    required=False,
39
    help="Location of TIMDEX parquet dataset to write to.",
40
)
41
@click.option(
1✔
42
    "-s",
43
    "--source",
44
    required=True,
45
    type=click.Choice(list(SOURCES.keys()), case_sensitive=False),
46
    help="Source records were harvested from, must choose from list of options",
47
)
48
@click.option(
1✔
49
    "-r",
50
    "--run-id",
51
    required=False,
52
    help="Identifier for Transmogrifier run.  This can be used to group transformed "
53
    "records produced by Transmogrifier, even if they span multiple CLI invocations.  "
54
    "If a value is not provided a UUID will be minted and used.",
55
)
56
@click.option(
1✔
57
    "-v", "--verbose", is_flag=True, help="Pass to log at debug level instead of info"
58
)
59
def main(
1✔
60
    source: str,
61
    input_file: str,
62
    output_file: str,
63
    output_location: str,
64
    run_id: str,
65
    verbose: bool,  # noqa: FBT001
66
) -> None:
67
    start_time = perf_counter()
1✔
68
    root_logger = logging.getLogger()
1✔
69
    logger.info(configure_logger(root_logger, verbose))
1✔
70
    logger.info(configure_sentry())
1✔
71
    logger.info("Running transform for source %s", source)
1✔
72

73
    transformer = Transformer.load(source, input_file, run_id=run_id)
1✔
74

75
    # NOTE: FEATURE FLAG: branching logic will be removed after v2 work is complete
76
    etl_version = get_etl_version()
1✔
77
    match etl_version:
1✔
78
        case 1:
1✔
79
            if output_file is None:
1✔
NEW
80
                message = "--output-file must be set when using ETL_VERSION=1"
×
NEW
81
                raise RuntimeError(message)
×
82
            transformer.transform_and_write_output_files(output_file)
1✔
83
        case 2:
1✔
84
            if output_location is None:
1✔
NEW
85
                message = "-o / --output-location must be set when using ETL_VERSION=2"
×
NEW
86
                raise RuntimeError(message)
×
87
            transformer.write_to_parquet_dataset(output_location)
1✔
88

89
    logger.info(
1✔
90
        (
91
            "Completed transform, total records processed: %d, "
92
            "transformed records: %d, "
93
            "skipped records: %d, "
94
            "deleted records: %d"
95
        ),
96
        transformer.processed_record_count,
97
        transformer.transformed_record_count,
98
        transformer.skipped_record_count,
99
        len(transformer.deleted_records),
100
    )
101

102
    elapsed_time = perf_counter() - start_time
1✔
103
    logger.info(
1✔
104
        "Total time to complete transform: %s", str(timedelta(seconds=elapsed_time))
105
    )
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc