• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

mborsetti / webchanges / 11226808835

08 Oct 2024 01:21AM UTC coverage: 77.666% (-0.1%) from 77.792%
11226808835

push

github

mborsetti
Version 3.26.0rc0

1751 of 2524 branches covered (69.37%)

Branch coverage included in aggregate %.

4477 of 5495 relevant lines covered (81.47%)

4.75 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

67.96
/webchanges/cli.py
1
#!/usr/bin/env python3
2

3
"""Module containing the entry point: the function main()."""
4

5
# See config module for the command line arguments.
6

7
# The code below is subject to the license contained in the LICENSE file, which is part of the source code.
8

9
from __future__ import annotations
6✔
10

11
import logging
6✔
12
import os
6✔
13
import platform
6✔
14
import shutil
6✔
15
import signal
6✔
16
import subprocess  # noqa: S404 Consider possible security implications associated with the subprocess module.
6✔
17
import sys
6✔
18
import warnings
6✔
19
from pathlib import Path, PurePath
6✔
20

21
import platformdirs
6✔
22

23
from webchanges import __copyright__, __docs_url__, __min_python_version__, __project_name__, __version__
6✔
24
from webchanges.config import CommandConfig
6✔
25
from webchanges.util import file_ownership_checks, get_new_version_number, import_module_from_source
6✔
26

27
# Ignore signal SIGPIPE ("broken pipe") for stdout (see https://github.com/thp/urlwatch/issues/77)
28
if os.name != 'nt':  # Windows does not have signal.SIGPIPE
6!
29
    signal.signal(signal.SIGPIPE, signal.SIG_DFL)  # type: ignore[attr-defined]  # not defined in Windows
6✔
30

31
logger = logging.getLogger(__name__)
6✔
32

33

34
def python_version_warning() -> None:
6✔
35
    """Check if we're running on the minimum Python version supported and if so print and issue a pending deprecation
36
    warning."""
37
    if sys.version_info[0:2] == __min_python_version__:
6✔
38
        current_minor_version = '.'.join(str(n) for n in sys.version_info[0:2])
2✔
39
        next_minor_version = f'{__min_python_version__[0]}.{__min_python_version__[1] + 1}'
2✔
40
        warning = (
2✔
41
            f'Support for Python {current_minor_version} will be ending three years from the date Python '
42
            f'{next_minor_version} was released'
43
        )
44
        print(f'WARNING: {warning}\n')
2✔
45
        PendingDeprecationWarning(warning)
2✔
46

47

48
def migrate_from_legacy(
6✔
49
    legacy_package: str,
50
    config_file: Path | None = None,
51
    jobs_file: Path | None = None,
52
    hooks_file: Path | None = None,
53
    ssdb_file: Path | None = None,
54
) -> None:
55
    """Check for existence of legacy files for configuration, jobs and Python hooks and migrate them (i.e. make a copy
56
    to new folder and/or name). Original files are not deleted.
57

58
    :param legacy_package: The name of the legacy package to migrate (e.g. urlwatch).
59
    :param config_file: The new Path to the configuration file.
60
    :param jobs_file: The new Path to the jobs file.
61
    :param hooks_file: The new Path to the hooks file.
62
    :param ssdb_file: The new Path to the snapshot database file.
63
    """
64
    legacy_project_path = Path.home().joinpath(f'.{legacy_package}')
6✔
65
    leagacy_config_file = legacy_project_path.joinpath(f'{legacy_package}.yaml')
6✔
66
    legacy_urls_file = legacy_project_path.joinpath('urls.yaml')
6✔
67
    legacy_hooks_file = legacy_project_path.joinpath('hooks.py')
6✔
68
    legacy_cache_path = platformdirs.user_cache_path(legacy_package)
6✔
69
    legacy_cache_file = legacy_cache_path.joinpath('cache.db')
6✔
70
    for old_file, new_file in zip(
6✔
71
        (leagacy_config_file, legacy_urls_file, legacy_hooks_file, legacy_cache_file),
72
        (config_file, jobs_file, hooks_file, ssdb_file),
73
    ):
74
        if new_file and old_file.is_file() and not new_file.is_file():
6!
75
            new_file.parent.mkdir(parents=True, exist_ok=True)
×
76
            shutil.copyfile(old_file, new_file)
×
77
            logger.warning(f"Copied {legacy_package} '{old_file}' file to {__project_name__} '{new_file}'.")
×
78
            logger.warning(f"You can safely delete '{old_file}'.")
×
79

80

81
def setup_logger(verbose: int | None = None) -> None:
6✔
82
    """Set up the logger.
83

84
    :param verbose: the verbosity level (1 = INFO, 2 = ERROR).
85
    """
86
    log_level = None
6✔
87
    if verbose is not None:
6!
88
        if verbose >= 3:
×
89
            log_level = 'NOTSET'
×
90
            # https://playwright.dev/python/docs/debug#verbose-api-logs
91
            os.environ['DEBUG'] = 'pw:api pytest -s'
×
92
        if verbose >= 2:
×
93
            log_level = 'DEBUG'
×
94
            # https://playwright.dev/python/docs/debug#verbose-api-logs
95
            os.environ['DEBUG'] = 'pw:api pytest -s'
×
96
        elif verbose == 1:
×
97
            log_level = 'INFO'
×
98

99
    if not verbose:
6!
100
        sys.tracebacklimit = 0
6✔
101

102
    logging.basicConfig(format='%(asctime)s %(module)s[%(thread)s] %(levelname)s: %(message)s', level=log_level)
6✔
103
    logger.info(f'{__project_name__}: {__version__} {__copyright__}')
6✔
104
    logger.info(
6✔
105
        f'{platform.python_implementation()}: {platform.python_version()} '
106
        f'{platform.python_build()} {platform.python_compiler()}'
107
    )
108
    logger.info(f'System: {platform.platform()}')
6✔
109

110

111
def teardown_logger(verbose: int | None = None) -> None:
6✔
112
    """Clean up logging.
113

114
    :param verbose: the verbosity level (1 = INFO, 2 = ERROR).
115
    """
116
    if verbose is not None:
×
117
        if verbose >= 2:
×
118
            # https://playwright.dev/python/docs/debug#verbose-api-logs
119
            os.environ.pop('DEBUG', None)
×
120

121

122
def _expand_glob_files(filename: Path, default_path: Path, ext: str | None = None) -> list[Path]:
6✔
123
    """Searches for file both as specified and in the default directory, then retries with 'ext' extension if defined.
124

125
    :param filename: The filename.
126
    :param default_path: The default directory.
127
    :param ext: The extension, e.g. '.yaml', to add for searching if first scan fails.
128

129
    :returns: The filename, either original or one with path where found and/or extension.
130
    """
131
    search_filenames = [filename]
6✔
132

133
    # if ext is given, iterate both on raw filename and the filename with ext if different
134
    if ext and filename.suffix != ext:
6!
135
        search_filenames.append(filename.with_suffix(ext))
6✔
136
        # also iterate on file pre-pended with 'jobs-'
137
        search_filenames.append(filename.with_stem(f'jobs-{filename.stem}').with_suffix(ext))
6✔
138

139
    # try as given
140
    for file in search_filenames:
6✔
141
        # https://stackoverflow.com/questions/56311703/globbing-absolute-paths-with-pathlib
142
        file_list = list(Path(file.anchor).glob(str(file.relative_to(file.anchor))))
6✔
143
        if any(f.is_file() for f in file_list):
6!
144
            return file_list
×
145

146
        # no directory specified (and not in current one): add default one
147
        if not file.is_absolute() and not Path(file).parent == Path.cwd():
6!
148
            file_list = list(default_path.glob(str(file)))
6✔
149
            if any(f.is_file() for f in file_list):
6!
150
                return file_list
×
151

152
    # no matches found
153
    return [filename]
6✔
154

155

156
def locate_glob_files(filenames: list[Path], default_path: Path, ext: str | None = None) -> list[Path]:
6✔
157
    job_files = set()
6✔
158
    for filename in filenames:
6✔
159
        for file in _expand_glob_files(filename, default_path, ext):
6✔
160
            job_files.add(file)
6✔
161
    return list(job_files)
6✔
162

163

164
def locate_storage_file(filename: Path, default_path: Path, ext: str | None = None) -> Path:
6✔
165
    """Searches for file both as specified and in the default directory, then retries with 'ext' extension if defined.
166

167
    :param filename: The filename.
168
    :param default_path: The default directory.
169
    :param ext: The extension, e.g. '.yaml', to add for searching if first scan fails.
170

171
    :returns: The filename, either original or one with path where found and/or extension.
172
    """
173
    search_filenames = [filename]
6✔
174

175
    # if ext is given, iterate both on raw filename and the filename with ext if different
176
    if ext and filename.suffix != ext:
6!
177
        search_filenames.append(filename.with_suffix(ext))
6✔
178

179
    for file in search_filenames:
6✔
180
        # return if found
181
        if file.is_file():
6!
182
            return file
×
183

184
        # no directory specified (and not in current one): add default one
185
        if file.parent == PurePath('.'):
6!
186
            new_file = default_path.joinpath(file)
6✔
187
            if new_file.is_file():
6!
188
                return new_file
×
189

190
    # no matches found
191
    return filename
6✔
192

193

194
def locate_storage_files(filename_list: list[Path], default_path: Path, ext: str | None = None) -> set[Path]:
6✔
195
    """Searches for file both as specified and in the default directory, then retries with 'ext' extension if defined.
196

197
    :param filename_list: The list of filenames.
198
    :param default_path: The default directory.
199
    :param ext: The extension, e.g. '.yaml', to add for searching if first scan fails.
200

201
    :returns: The list filenames, either originals or ones with path where found and/or extension.
202
    """
203
    filenames = set()
×
204
    for filename in filename_list:
×
205
        filenames.add(locate_storage_file(filename, default_path, ext))
×
206
    return filenames
×
207

208

209
def first_run(command_config: CommandConfig) -> None:
6✔
210
    """Create configuration and jobs files.
211

212
    :param command_config: the CommandConfig containing the command line arguments selected.
213
    """
214
    if not command_config.config_file.is_file():
6!
215
        command_config.config_file.parent.mkdir(parents=True, exist_ok=True)
6✔
216
        from webchanges.storage import YamlConfigStorage
6✔
217

218
        YamlConfigStorage.write_default_config(command_config.config_file)
6✔
219
        print(f'Created default config file at {command_config.config_file}')
6✔
220
        if not command_config.edit_config:
6!
221
            print(f'> Edit it with {__project_name__} --edit-config')
6✔
222
    if not any(f.is_file() for f in command_config.jobs_files):
6!
223
        command_config.jobs_files[0].parent.mkdir(parents=True, exist_ok=True)
6✔
224
        command_config.jobs_files[0].write_text(
6✔
225
            f'# {__project_name__} jobs file. See {__docs_url__}en/stable/jobs.html\n'
226
        )
227
        print(f'Created default jobs file at {command_config.jobs_files[0]}')
6✔
228
        if not command_config.edit:
6!
229
            print(f'> Edit it with {__project_name__} --edit')
6✔
230

231

232
def load_hooks(hooks_file: Path) -> None:
6✔
233
    """Load hooks file."""
234
    if not hooks_file.is_file():
2!
235
        warnings.warn(
2✔
236
            f'Hooks file not imported because {hooks_file} is not a file',
237
            ImportWarning,
238
        )
239
        return
2✔
240

241
    hooks_file_errors = file_ownership_checks(hooks_file)
×
242
    if hooks_file_errors:
×
243
        warnings.warn(
×
244
            f'Hooks file {hooks_file} not imported because '
245
            f" {' and '.join(hooks_file_errors)}.\n"
246
            f'(see {__docs_url__}en/stable/hooks.html#important-note-for-hooks-file)',
247
            ImportWarning,
248
        )
249
    else:
250
        logger.info(f'Importing hooks module from {hooks_file}')
×
251
        import_module_from_source('hooks', hooks_file)
×
252
        logger.info('Finished importing hooks module')
×
253

254

255
def handle_unitialized_actions(urlwatch_config: CommandConfig) -> None:
6✔
256
    """Handles CLI actions that do not require all classes etc. to be initialized (and command.py loaded). For speed
257
    purposes."""
258

259
    def _exit(arg: str | int | None) -> None:
2✔
260
        logger.info(f'Exiting with exit code {arg}')
2✔
261
        sys.exit(arg)
2✔
262

263
    def print_new_version() -> int:
2✔
264
        """Will print alert message if a newer version is found on PyPi."""
265
        print(f'{__project_name__} {__version__}.', end='')
2✔
266
        new_release = get_new_version_number(timeout=2)
2✔
267
        if new_release:
2!
268
            print(
×
269
                f'\nNew release version {new_release} is available; we recommend updating using e.g. '
270
                f"'pip install -U {__project_name__}'."
271
            )
272
            return 0
×
273
        elif new_release == '':
2!
274
            print(' You are running the latest release.')
2✔
275
            return 0
2✔
276
        else:
277
            print(' Error contacting PyPI to determine the latest release.')
×
278
            return 1
×
279

280
    def playwright_install_chrome() -> int:  # pragma: no cover
281
        """
282
        Replicates playwright.___main__.main() function, which is called by the playwright executable, in order to
283
        install the browser executable.
284

285
        :return: Playwright's executable return code.
286
        """
287
        try:
288
            from playwright._impl._driver import compute_driver_executable
289
        except ImportError:  # pragma: no cover
290
            raise ImportError('Python package playwright is not installed; cannot install the Chrome browser') from None
291

292
        driver_executable = compute_driver_executable()
293
        env = os.environ.copy()
294
        env['PW_CLI_TARGET_LANG'] = 'python'
295
        cmd = [str(driver_executable), 'install', 'chrome']
296
        logger.info(f"Running playwright CLI: {' '.join(cmd)}")
297
        completed_process = subprocess.run(cmd, env=env, capture_output=True, text=True)  # noqa: S603
298
        if completed_process.returncode:
299
            print(completed_process.stderr)
300
            return completed_process.returncode
301
        if completed_process.stdout:
302
            logger.info(f'Success! Output of Playwright CLI: {completed_process.stdout}')
303
        return 0
304

305
    if urlwatch_config.check_new:
2✔
306
        _exit(print_new_version())
2✔
307

308
    if urlwatch_config.install_chrome:  # pragma: no cover
309
        _exit(playwright_install_chrome())
310

311

312
def main() -> None:  # pragma: no cover
313
    """The entry point run when __name__ == '__main__'.
314

315
    Contains all the high-level logic to instantiate all classes that run the program.
316

317
    :raises NotImplementedError: If a `--database-engine` is specified that is not supported.
318
    :raises RuntimeError: If `--database-engine redis` is selected but `--cache` with a redis URI is not provided.
319
    """
320
    # Make sure that PendingDeprecationWarning are displayed from all modules (otherwise only those in __main__ are)
321
    warnings.filterwarnings('default', category=PendingDeprecationWarning)
322

323
    # Issue deprecation warning if running on minimum version supported
324
    python_version_warning()
325

326
    # Path where the config, jobs and hooks files are located
327
    if os.name != 'nt':
328
        config_path = platformdirs.user_config_path(__project_name__)  # typically ~/.config/{__project_name__}
329
    else:
330
        config_path = platformdirs.user_documents_path().joinpath(__project_name__)
331

332
    # Path where the snapshot database is located; typically ~/.local/share/{__project_name__} or
333
    # $XDG_DATA_HOME/{__project_name__} # in linux, ~/Library/Application Support/webchanges in macOS  and
334
    # or %LOCALAPPDATA%\{__project_name__}\{__project_name__} in Windows
335
    data_path = platformdirs.user_data_path(__project_name__, __project_name__.capitalize())
336

337
    # Default config, jobs, hooks and ssdb (database) files
338
    default_config_file = config_path.joinpath('config.yaml')
339
    default_jobs_file = config_path.joinpath('jobs.yaml')
340
    default_hooks_file = config_path.joinpath('hooks.py')
341
    default_ssdb_file = data_path.joinpath('snapshots.db')
342

343
    # Check for and if found migrate snapshot database file from version <= 3.21, which was called cache.db and located
344
    # in user_cache_path
345
    migrate_from_legacy('webchanges', ssdb_file=default_ssdb_file)
346

347
    # Check for and if found migrate legacy (urlwatch) files
348
    migrate_from_legacy('urlwatch', default_config_file, default_jobs_file, default_hooks_file, default_ssdb_file)
349

350
    # Parse command line arguments
351
    command_config = CommandConfig(
352
        sys.argv[1:],
353
        config_path,
354
        default_config_file,
355
        default_jobs_file,
356
        default_hooks_file,
357
        default_ssdb_file,
358
    )
359

360
    # Set up the logger to verbose if needed
361
    setup_logger(command_config.verbose)
362

363
    # For speed, run these here
364
    handle_unitialized_actions(command_config)
365

366
    # Only now, after configuring logging, we can load other modules
367
    from webchanges.command import UrlwatchCommand
368
    from webchanges.main import Urlwatch
369
    from webchanges.storage import (
370
        SsdbDirStorage,
371
        SsdbRedisStorage,
372
        SsdbSQLite3Storage,
373
        SsdbStorage,
374
        YamlConfigStorage,
375
        YamlJobsStorage,
376
    )
377

378
    # Locate config, job and hooks files
379
    command_config.config_file = locate_storage_file(command_config.config_file, command_config.config_path, '.yaml')
380
    command_config.jobs_files = locate_glob_files(command_config.jobs_files, command_config.config_path, '.yaml')
381
    command_config.hooks_files = locate_glob_files(command_config.hooks_files, command_config.config_path, '.py')
382

383
    # Check for first run
384
    if command_config.config_file == default_config_file and not Path(command_config.config_file).is_file():
385
        first_run(command_config)
386

387
    # Setup config file API
388
    config_storage = YamlConfigStorage(command_config.config_file)  # storage.py
389

390
    # load config (which for syntax checking requires hooks to be loaded too)
391
    if command_config.hooks_files:
392
        for hooks_file in command_config.hooks_files:
393
            load_hooks(hooks_file)
394
    config_storage.load()
395

396
    # Setup database API
397
    database_engine = (
398
        command_config.database_engine or config_storage.config.get('database', {}).get('engine') or 'sqlite3'
399
    )  # "or 'sqlite3'" is not needed except for a mypy bug; same for the "or 4" below
400
    max_snapshots = command_config.max_snapshots or config_storage.config.get('database', {}).get('max_snapshots') or 4
401
    if database_engine == 'sqlite3':
402
        ssdb_storage: SsdbStorage = SsdbSQLite3Storage(command_config.ssdb_file, max_snapshots)  # storage.py
403
    elif any(str(command_config.ssdb_file).startswith(prefix) for prefix in {'redis://', 'rediss://'}):
404
        ssdb_storage = SsdbRedisStorage(command_config.ssdb_file)  # storage.py
405
    elif database_engine.startswith('redis'):
406
        ssdb_storage = SsdbRedisStorage(database_engine)
407
    elif database_engine == 'textfiles':
408
        ssdb_storage = SsdbDirStorage(command_config.ssdb_file)  # storage.py
409
    elif database_engine == 'minidb':
410
        # legacy code imported only if needed (requires minidb, which is not a dependency)
411
        from webchanges.storage_minidb import SsdbMiniDBStorage
412

413
        ssdb_storage = SsdbMiniDBStorage(command_config.ssdb_file)  # storage.py
414
    else:
415
        raise NotImplementedError(f'Database engine {database_engine} not implemented')
416

417
    # Setup jobs file API
418
    jobs_storage = YamlJobsStorage(command_config.jobs_files)  # storage.py
419

420
    # Setup 'webchanges'
421
    urlwatcher = Urlwatch(command_config, config_storage, ssdb_storage, jobs_storage)  # main.py
422
    urlwatch_command = UrlwatchCommand(urlwatcher)  # command.py
423

424
    # Run 'webchanges', starting with processing command line arguments
425
    urlwatch_command.run()
426

427
    # Remove Playwright debug mode if there
428
    teardown_logger(command_config.verbose)
429

430

431
if __name__ == '__main__':
432
    main()
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc