• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

mborsetti / webchanges / 11992535834

24 Nov 2024 02:51AM UTC coverage: 75.597%. First build
11992535834

push

github

mborsetti
Version 3.27.0b2

1703 of 2575 branches covered (66.14%)

Branch coverage included in aggregate %.

29 of 40 new or added lines in 6 files covered. (72.5%)

4505 of 5637 relevant lines covered (79.92%)

6.19 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

66.98
/webchanges/cli.py
1
#!/usr/bin/env python3
2

3
"""Module containing the entry point: the function main()."""
2✔
4

5
# See config module for the command line arguments.
6

7
# The code below is subject to the license contained in the LICENSE file, which is part of the source code.
8

9
from __future__ import annotations
8✔
10

11
import logging
8✔
12
import os
8✔
13
import platform
8✔
14
import shutil
8✔
15
import signal
8✔
16
import subprocess  # noqa: S404 Consider possible security implications associated with the subprocess module.
8✔
17
import sys
8✔
18
import warnings
8✔
19
from pathlib import Path, PurePath
8✔
20

21
import platformdirs
8✔
22

23
from webchanges import __copyright__, __docs_url__, __min_python_version__, __project_name__, __version__
8✔
24
from webchanges.config import CommandConfig
8✔
25
from webchanges.util import file_ownership_checks, get_new_version_number, import_module_from_source
8✔
26

27
# Ignore signal SIGPIPE ("broken pipe") for stdout (see https://github.com/thp/urlwatch/issues/77)
28
if os.name != 'nt':  # Windows does not have signal.SIGPIPE
8!
29
    signal.signal(signal.SIGPIPE, signal.SIG_DFL)  # type: ignore[attr-defined]  # not defined in Windows
8✔
30

31
logger = logging.getLogger(__name__)
8✔
32

33

34
def python_version_warning() -> None:
8✔
35
    """Check if we're running on the minimum Python version supported and if so print and issue a pending deprecation
36
    warning."""
37
    if sys.version_info[0:2] == __min_python_version__:
8✔
38
        current_minor_version = '.'.join(str(n) for n in sys.version_info[0:2])
2!
39
        next_minor_version = f'{__min_python_version__[0]}.{__min_python_version__[1] + 1}'
2✔
40
        warning = (
2✔
41
            f'Support for Python {current_minor_version} will be ending three years from the date Python '
42
            f'{next_minor_version} was released'
43
        )
44
        print(f'WARNING: {warning}\n')
2✔
45
        PendingDeprecationWarning(warning)
2✔
46

47

48
def migrate_from_legacy(
8✔
49
    legacy_package: str,
50
    config_file: Path | None = None,
51
    jobs_file: Path | None = None,
52
    hooks_file: Path | None = None,
53
    ssdb_file: Path | None = None,
54
) -> None:
55
    """Check for existence of legacy files for configuration, jobs and Python hooks and migrate them (i.e. make a copy
56
    to new folder and/or name). Original files are not deleted.
57

58
    :param legacy_package: The name of the legacy package to migrate (e.g. urlwatch).
59
    :param config_file: The new Path to the configuration file.
60
    :param jobs_file: The new Path to the jobs file.
61
    :param hooks_file: The new Path to the hooks file.
62
    :param ssdb_file: The new Path to the snapshot database file.
63
    """
64
    legacy_project_path = Path.home().joinpath(f'.{legacy_package}')
8✔
65
    leagacy_config_file = legacy_project_path.joinpath(f'{legacy_package}.yaml')
8✔
66
    legacy_urls_file = legacy_project_path.joinpath('urls.yaml')
8✔
67
    legacy_hooks_file = legacy_project_path.joinpath('hooks.py')
8✔
68
    legacy_cache_path = platformdirs.user_cache_path(legacy_package)
8✔
69
    legacy_cache_file = legacy_cache_path.joinpath('cache.db')
8✔
70
    for old_file, new_file in zip(
8✔
71
        (leagacy_config_file, legacy_urls_file, legacy_hooks_file, legacy_cache_file),
72
        (config_file, jobs_file, hooks_file, ssdb_file),
73
    ):
74
        if new_file and old_file.is_file() and not new_file.is_file():
8!
75
            new_file.parent.mkdir(parents=True, exist_ok=True)
×
76
            shutil.copyfile(old_file, new_file)
×
77
            logger.warning(f"Copied {legacy_package} '{old_file}' file to {__project_name__} '{new_file}'.")
×
78
            logger.warning(f"You can safely delete '{old_file}'.")
×
79

80

81
def setup_logger(verbose: int | None = None, log_file: Path | None = None) -> None:
8✔
82
    """Set up the logger.
83

84
    :param verbose: the verbosity level (1 = INFO, 2 = ERROR).
85
    """
86
    log_level = None
8✔
87
    if verbose is not None:
8!
88
        if verbose >= 3:
×
89
            log_level = 'NOTSET'
×
90
            # https://playwright.dev/python/docs/debug#verbose-api-logs
91
            os.environ['DEBUG'] = 'pw:api pytest -s'
×
92
        if verbose >= 2:
×
93
            log_level = 'DEBUG'
×
94
            # https://playwright.dev/python/docs/debug#verbose-api-logs
95
            os.environ['DEBUG'] = 'pw:api pytest -s'
×
96
        elif verbose == 1:
×
97
            log_level = 'INFO'
×
98

99
    if not verbose:
8!
100
        sys.tracebacklimit = 0
8✔
101

102
    if log_file:
8!
NEW
103
        handlers: tuple[logging.Handler, ...] | None = (logging.FileHandler(log_file),)
×
104
    else:
105
        handlers = None
8✔
106

107
    logging.basicConfig(
8✔
108
        format='%(asctime)s %(module)s[%(thread)s] %(levelname)s: %(message)s',
109
        level=log_level,
110
        handlers=handlers,
111
    )
112
    logger.info(f'{__project_name__}: {__version__} {__copyright__}')
8✔
113
    logger.info(
8✔
114
        f'{platform.python_implementation()}: {platform.python_version()} '
115
        f'{platform.python_build()} {platform.python_compiler()}'
116
    )
117
    logger.info(f'System: {platform.platform()}')
8✔
118

119

120
def teardown_logger(verbose: int | None = None) -> None:
8✔
121
    """Clean up logging.
122

123
    :param verbose: the verbosity level (1 = INFO, 2 = ERROR).
124
    """
125
    if verbose is not None:
×
126
        if verbose >= 2:
×
127
            # https://playwright.dev/python/docs/debug#verbose-api-logs
128
            os.environ.pop('DEBUG', None)
×
129

130

131
def _expand_glob_files(filename: Path, default_path: Path, ext: str | None = None) -> list[Path]:
8✔
132
    """Searches for file both as specified and in the default directory, then retries with 'ext' extension if defined.
133

134
    :param filename: The filename.
135
    :param default_path: The default directory.
136
    :param ext: The extension, e.g. '.yaml', to add for searching if first scan fails.
137

138
    :returns: The filename, either original or one with path where found and/or extension.
139
    """
140
    search_filenames = [filename]
8✔
141

142
    # if ext is given, iterate both on raw filename and the filename with ext if different
143
    if ext and filename.suffix != ext:
8!
144
        search_filenames.append(filename.with_suffix(ext))
8✔
145
        # also iterate on file pre-pended with 'jobs-'
146
        search_filenames.append(filename.with_stem(f'jobs-{filename.stem}').with_suffix(ext))
8✔
147

148
    # try as given
149
    for file in search_filenames:
8✔
150
        # https://stackoverflow.com/questions/56311703/globbing-absolute-paths-with-pathlib
151
        file_list = list(Path(file.anchor).glob(str(file.relative_to(file.anchor))))
8✔
152
        if any(f.is_file() for f in file_list):
8!
153
            return file_list
×
154

155
        # no directory specified (and not in current one): add default one
156
        if not file.is_absolute() and not Path(file).parent == Path.cwd():
8!
157
            file_list = list(default_path.glob(str(file)))
8✔
158
            if any(f.is_file() for f in file_list):
8!
159
                return file_list
×
160

161
    # no matches found
162
    return [filename]
8✔
163

164

165
def locate_glob_files(filenames: list[Path], default_path: Path, ext: str | None = None) -> list[Path]:
8✔
166
    job_files = set()
8✔
167
    for filename in filenames:
8✔
168
        for file in _expand_glob_files(filename, default_path, ext):
8✔
169
            job_files.add(file)
8✔
170
    return list(job_files)
8✔
171

172

173
def locate_storage_file(filename: Path, default_path: Path, ext: str | None = None) -> Path:
8✔
174
    """Searches for file both as specified and in the default directory, then retries with 'ext' extension if defined.
175

176
    :param filename: The filename.
177
    :param default_path: The default directory.
178
    :param ext: The extension, e.g. '.yaml', to add for searching if first scan fails.
179

180
    :returns: The filename, either original or one with path where found and/or extension.
181
    """
182
    search_filenames = [filename]
8✔
183

184
    # if ext is given, iterate both on raw filename and the filename with ext if different
185
    if ext and filename.suffix != ext:
8!
186
        search_filenames.append(filename.with_suffix(ext))
8✔
187

188
    for file in search_filenames:
8✔
189
        # return if found
190
        if file.is_file():
8!
191
            return file
×
192

193
        # no directory specified (and not in current one): add default one
194
        if file.parent == PurePath('.'):
8!
195
            new_file = default_path.joinpath(file)
8✔
196
            if new_file.is_file():
8!
197
                return new_file
×
198

199
    # no matches found
200
    return filename
8✔
201

202

203
def locate_storage_files(filename_list: list[Path], default_path: Path, ext: str | None = None) -> set[Path]:
8✔
204
    """Searches for file both as specified and in the default directory, then retries with 'ext' extension if defined.
205

206
    :param filename_list: The list of filenames.
207
    :param default_path: The default directory.
208
    :param ext: The extension, e.g. '.yaml', to add for searching if first scan fails.
209

210
    :returns: The list filenames, either originals or ones with path where found and/or extension.
211
    """
212
    filenames = set()
×
213
    for filename in filename_list:
×
214
        filenames.add(locate_storage_file(filename, default_path, ext))
×
215
    return filenames
×
216

217

218
def first_run(command_config: CommandConfig) -> None:
8✔
219
    """Create configuration and jobs files.
220

221
    :param command_config: the CommandConfig containing the command line arguments selected.
222
    """
223
    if not command_config.config_file.is_file():
8!
224
        command_config.config_file.parent.mkdir(parents=True, exist_ok=True)
8✔
225
        from webchanges.storage import YamlConfigStorage
8✔
226

227
        YamlConfigStorage.write_default_config(command_config.config_file)
8✔
228
        print(f'Created default config file at {command_config.config_file}')
8✔
229
        if not command_config.edit_config:
8!
230
            print(f'> Edit it with {__project_name__} --edit-config')
8✔
231
    if not any(f.is_file() for f in command_config.jobs_files):
8!
232
        command_config.jobs_files[0].parent.mkdir(parents=True, exist_ok=True)
8✔
233
        command_config.jobs_files[0].write_text(
8✔
234
            f'# {__project_name__} jobs file. See {__docs_url__}en/stable/jobs.html\n'
235
        )
236
        print(f'Created default jobs file at {command_config.jobs_files[0]}')
8✔
237
        if not command_config.edit:
8!
238
            print(f'> Edit it with {__project_name__} --edit')
8✔
239

240

241
def load_hooks(hooks_file: Path) -> None:
8✔
242
    """Load hooks file."""
243
    if not hooks_file.is_file():
4!
244
        warnings.warn(
4✔
245
            f'Hooks file not imported because {hooks_file} is not a file',
246
            ImportWarning,
247
        )
248
        return
4✔
249

250
    hooks_file_errors = file_ownership_checks(hooks_file)
×
251
    if hooks_file_errors:
×
252
        warnings.warn(
×
253
            f'Hooks file {hooks_file} not imported because '
254
            f" {' and '.join(hooks_file_errors)}.\n"
255
            f'(see {__docs_url__}en/stable/hooks.html#important-note-for-hooks-file)',
256
            ImportWarning,
257
        )
258
    else:
259
        logger.info(f'Importing hooks module from {hooks_file}')
×
260
        import_module_from_source('hooks', hooks_file)
×
261
        logger.info('Finished importing hooks module')
×
262

263

264
def handle_unitialized_actions(urlwatch_config: CommandConfig) -> None:
8✔
265
    """Handles CLI actions that do not require all classes etc. to be initialized (and command.py loaded). For speed
266
    purposes."""
267

268
    def _exit(arg: str | int | None) -> None:
2✔
269
        logger.info(f'Exiting with exit code {arg}')
2✔
270
        sys.exit(arg)
2✔
271

272
    def print_new_version() -> int:
2✔
273
        """Will print alert message if a newer version is found on PyPi."""
274
        print(f'{__project_name__} {__version__}.', end='')
2✔
275
        new_release = get_new_version_number(timeout=2)
2✔
276
        if new_release:
2!
277
            print(
×
278
                f'\nNew release version {new_release} is available; we recommend updating using e.g. '
279
                f"'pip install -U {__project_name__}'."
280
            )
281
            return 0
×
282
        elif new_release == '':
2!
283
            print(' You are running the latest release.')
2✔
284
            return 0
2✔
285
        else:
286
            print(' Error contacting PyPI to determine the latest release.')
×
287
            return 1
×
288

289
    def playwright_install_chrome() -> int:  # pragma: no cover
290
        """
291
        Replicates playwright.___main__.main() function, which is called by the playwright executable, in order to
292
        install the browser executable.
293

294
        :return: Playwright's executable return code.
295
        """
296
        try:
297
            from playwright._impl._driver import compute_driver_executable
298
        except ImportError:  # pragma: no cover
299
            raise ImportError('Python package playwright is not installed; cannot install the Chrome browser') from None
300

301
        driver_executable = compute_driver_executable()
302
        env = os.environ.copy()
303
        env['PW_CLI_TARGET_LANG'] = 'python'
304
        cmd = [str(driver_executable), 'install', 'chrome']
305
        logger.info(f"Running playwright CLI: {' '.join(cmd)}")
306
        completed_process = subprocess.run(cmd, env=env, capture_output=True, text=True)  # noqa: S603
307
        if completed_process.returncode:
308
            print(completed_process.stderr)
309
            return completed_process.returncode
310
        if completed_process.stdout:
311
            logger.info(f'Success! Output of Playwright CLI: {completed_process.stdout}')
312
        return 0
313

314
    if urlwatch_config.check_new:
2✔
315
        _exit(print_new_version())
2✔
316

317
    if urlwatch_config.install_chrome:  # pragma: no cover
318
        _exit(playwright_install_chrome())
319

320

321
def main() -> None:  # pragma: no cover
322
    """The entry point run when __name__ == '__main__'.
323

324
    Contains all the high-level logic to instantiate all classes that run the program.
325

326
    :raises NotImplementedError: If a `--database-engine` is specified that is not supported.
327
    :raises RuntimeError: If `--database-engine redis` is selected but `--cache` with a redis URI is not provided.
328
    """
329
    # Make sure that PendingDeprecationWarning are displayed from all modules (otherwise only those in __main__ are)
330
    warnings.filterwarnings('default', category=PendingDeprecationWarning)
331

332
    # Issue deprecation warning if running on minimum version supported
333
    python_version_warning()
334

335
    # Path where the config, jobs and hooks files are located
336
    if os.name != 'nt':
337
        config_path = platformdirs.user_config_path(__project_name__)  # typically ~/.config/{__project_name__}
338
    else:
339
        config_path = platformdirs.user_documents_path().joinpath(__project_name__)
340

341
    # Path where the snapshot database is located; typically ~/.local/share/{__project_name__} or
342
    # $XDG_DATA_HOME/{__project_name__} # in linux, ~/Library/Application Support/webchanges in macOS  and
343
    # or %LOCALAPPDATA%\{__project_name__}\{__project_name__} in Windows
344
    data_path = platformdirs.user_data_path(__project_name__, __project_name__.capitalize())
345

346
    # Default config, jobs, hooks and ssdb (database) files
347
    default_config_file = config_path.joinpath('config.yaml')
348
    default_jobs_file = config_path.joinpath('jobs.yaml')
349
    default_hooks_file = config_path.joinpath('hooks.py')
350
    default_ssdb_file = data_path.joinpath('snapshots.db')
351

352
    # Check for and if found migrate snapshot database file from version <= 3.21, which was called cache.db and located
353
    # in user_cache_path
354
    migrate_from_legacy('webchanges', ssdb_file=default_ssdb_file)
355

356
    # Check for and if found migrate legacy (urlwatch) files
357
    migrate_from_legacy('urlwatch', default_config_file, default_jobs_file, default_hooks_file, default_ssdb_file)
358

359
    # Parse command line arguments
360
    command_config = CommandConfig(
361
        sys.argv[1:],
362
        config_path,
363
        default_config_file,
364
        default_jobs_file,
365
        default_hooks_file,
366
        default_ssdb_file,
367
    )
368

369
    # Set up the logger to verbose if needed
370
    setup_logger(command_config.verbose, command_config.log_file)
371

372
    # For speed, run these here
373
    handle_unitialized_actions(command_config)
374

375
    # Only now, after configuring logging, we can load other modules
376
    from webchanges.command import UrlwatchCommand
377
    from webchanges.main import Urlwatch
378
    from webchanges.storage import (
379
        SsdbDirStorage,
380
        SsdbRedisStorage,
381
        SsdbSQLite3Storage,
382
        SsdbStorage,
383
        YamlConfigStorage,
384
        YamlJobsStorage,
385
    )
386

387
    # Locate config, job and hooks files
388
    command_config.config_file = locate_storage_file(command_config.config_file, command_config.config_path, '.yaml')
389
    command_config.jobs_files = locate_glob_files(command_config.jobs_files, command_config.config_path, '.yaml')
390
    command_config.hooks_files = locate_glob_files(command_config.hooks_files, command_config.config_path, '.py')
391

392
    # Check for first run
393
    if command_config.config_file == default_config_file and not Path(command_config.config_file).is_file():
394
        first_run(command_config)
395

396
    # Setup config file API
397
    config_storage = YamlConfigStorage(command_config.config_file)  # storage.py
398

399
    # load config (which for syntax checking requires hooks to be loaded too)
400
    if command_config.hooks_files:
401
        for hooks_file in command_config.hooks_files:
402
            load_hooks(hooks_file)
403
    config_storage.load()
404

405
    # Setup database API
406
    database_engine = (
407
        command_config.database_engine or config_storage.config.get('database', {}).get('engine') or 'sqlite3'
408
    )  # "or 'sqlite3'" is not needed except for a mypy bug; same for the "or 4" below
409
    max_snapshots = command_config.max_snapshots or config_storage.config.get('database', {}).get('max_snapshots') or 4
410
    if database_engine == 'sqlite3':
411
        ssdb_storage: SsdbStorage = SsdbSQLite3Storage(command_config.ssdb_file, max_snapshots)  # storage.py
412
    elif any(str(command_config.ssdb_file).startswith(prefix) for prefix in {'redis://', 'rediss://'}):
413
        ssdb_storage = SsdbRedisStorage(command_config.ssdb_file)  # storage.py
414
    elif database_engine.startswith('redis'):
415
        ssdb_storage = SsdbRedisStorage(database_engine)
416
    elif database_engine == 'textfiles':
417
        ssdb_storage = SsdbDirStorage(command_config.ssdb_file)  # storage.py
418
    elif database_engine == 'minidb':
419
        # legacy code imported only if needed (requires minidb, which is not a dependency)
420
        from webchanges.storage_minidb import SsdbMiniDBStorage
421

422
        ssdb_storage = SsdbMiniDBStorage(command_config.ssdb_file)  # storage.py
423
    else:
424
        raise NotImplementedError(f'Database engine {database_engine} not implemented')
425

426
    # Setup jobs file API
427
    jobs_storage = YamlJobsStorage(command_config.jobs_files)  # storage.py
428

429
    # Setup 'webchanges'
430
    urlwatcher = Urlwatch(command_config, config_storage, ssdb_storage, jobs_storage)  # main.py
431
    urlwatch_command = UrlwatchCommand(urlwatcher)  # command.py
432

433
    # Run 'webchanges', starting with processing command line arguments
434
    urlwatch_command.run()
435

436
    # Remove Playwright debug mode if there
437
    teardown_logger(command_config.verbose)
438

439

440
if __name__ == '__main__':
441
    main()
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc