• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

mborsetti / webchanges / 16871375955

11 Aug 2025 05:06AM UTC coverage: 72.561% (-1.9%) from 74.431%
16871375955

push

github

mborsetti
Version 3.31.1rc0

1749 of 2772 branches covered (63.1%)

Branch coverage included in aggregate %.

4574 of 5942 relevant lines covered (76.98%)

5.96 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

62.93
/webchanges/cli.py
1
#!/usr/bin/env python3
2

3
"""Module containing the entry point: the function main()."""
2✔
4

5
# See config module for the command line arguments.
6

7
# The code below is subject to the license contained in the LICENSE file, which is part of the source code.
8

9
from __future__ import annotations
8✔
10

11
import logging
8✔
12
import os
8✔
13
import platform
8✔
14
import shutil
8✔
15
import signal
8✔
16
import subprocess
8✔
17
import sys
8✔
18
import warnings
8✔
19
from pathlib import Path, PurePath
8✔
20

21
import platformdirs
8✔
22

23
from webchanges import __copyright__, __docs_url__, __min_python_version__, __project_name__, __version__
8✔
24
from webchanges.config import CommandConfig
8✔
25
from webchanges.util import file_ownership_checks, get_new_version_number, import_module_from_source
8✔
26

27
# Ignore signal SIGPIPE ("broken pipe") for stdout (see https://github.com/thp/urlwatch/issues/77)
28
if os.name != 'nt':  # Windows does not have signal.SIGPIPE
8!
29
    signal.signal(signal.SIGPIPE, signal.SIG_DFL)  # type: ignore[attr-defined]  # not defined in Windows
8✔
30

31
logger = logging.getLogger(__name__)
8✔
32

33

34
def python_version_warning() -> None:
8✔
35
    """Check if we're running on the minimum Python version supported and if so print and issue a pending deprecation
36
    warning."""
37
    if sys.version_info[0:2] == __min_python_version__:
8✔
38
        current_minor_version = '.'.join(str(n) for n in sys.version_info[0:2])
2!
39
        next_minor_version = f'{__min_python_version__[0]}.{__min_python_version__[1] + 1}'
2✔
40
        warning = (
2✔
41
            f'Support for Python {current_minor_version} will be ending three years from the date Python '
42
            f'{next_minor_version} was released'
43
        )
44
        print(f'WARNING: {warning}\n')
2✔
45
        PendingDeprecationWarning(warning)
2✔
46

47

48
def migrate_from_legacy(
8✔
49
    legacy_package: str,
50
    config_file: Path | None = None,
51
    jobs_file: Path | None = None,
52
    hooks_file: Path | None = None,
53
    ssdb_file: Path | None = None,
54
) -> None:
55
    """Check for existence of legacy files for configuration, jobs and Python hooks and migrate them (i.e. make a copy
56
    to new folder and/or name). Original files are not deleted.
57

58
    :param legacy_package: The name of the legacy package to migrate (e.g. urlwatch).
59
    :param config_file: The new Path to the configuration file.
60
    :param jobs_file: The new Path to the jobs file.
61
    :param hooks_file: The new Path to the hooks file.
62
    :param ssdb_file: The new Path to the snapshot database file.
63
    """
64
    legacy_project_path = Path.home().joinpath(f'.{legacy_package}')
8✔
65
    leagacy_config_file = legacy_project_path.joinpath(f'{legacy_package}.yaml')
8✔
66
    legacy_urls_file = legacy_project_path.joinpath('urls.yaml')
8✔
67
    legacy_hooks_file = legacy_project_path.joinpath('hooks.py')
8✔
68
    legacy_cache_path = platformdirs.user_cache_path(legacy_package)
8✔
69
    legacy_cache_file = legacy_cache_path.joinpath('cache.db')
8✔
70
    for old_file, new_file in zip(
8✔
71
        (leagacy_config_file, legacy_urls_file, legacy_hooks_file, legacy_cache_file),
72
        (config_file, jobs_file, hooks_file, ssdb_file),
73
        strict=False,
74
    ):
75
        if new_file and old_file.is_file() and not new_file.is_file():
8!
76
            new_file.parent.mkdir(parents=True, exist_ok=True)
×
77
            shutil.copyfile(old_file, new_file)
×
78
            logger.warning(f"Copied {legacy_package} '{old_file}' file to {__project_name__} '{new_file}'.")
×
79
            logger.warning(f"You can safely delete '{old_file}'.")
×
80

81

82
def setup_logger(verbose: int | None = None, log_file: Path | None = None) -> None:
8✔
83
    """Set up the logger.
84

85
    :param verbose: the verbosity level (1 = INFO, 2 = ERROR, 3 = NOTSET).
86
    """
87
    if log_file:
8!
88
        handlers: tuple[logging.Handler, ...] | None = (logging.FileHandler(log_file),)
×
89
        if not verbose:
×
90
            verbose = 1
×
91
    else:
92
        handlers = None
8✔
93

94
    log_level = None
8✔
95

96
    if verbose is not None:
8!
97
        if verbose >= 3:
×
98
            log_level = 'NOTSET'
×
99
            # https://playwright.dev/python/docs/debug#verbose-api-logs
100
            os.environ['DEBUG'] = 'pw:api pytest -s'
×
101
        if verbose >= 2:
×
102
            log_level = 'DEBUG'
×
103
            # https://playwright.dev/python/docs/debug#verbose-api-logs
104
            os.environ['DEBUG'] = 'pw:api pytest -s'
×
105
        elif verbose == 1:
×
106
            log_level = 'INFO'
×
107

108
    # if not verbose:
109
    #     sys.tracebacklimit = 0
110

111
    logging.basicConfig(
8✔
112
        format='%(asctime)s %(module)s[%(thread)s] %(levelname)s: %(message)s',
113
        level=log_level,
114
        handlers=handlers,
115
    )
116
    logger.info(f'{__project_name__}: {__version__} {__copyright__}')
8✔
117
    logger.info(
8✔
118
        f'{platform.python_implementation()}: {platform.python_version()} '
119
        f'{platform.python_build()} {platform.python_compiler()}'
120
    )
121
    logger.info(f'System: {platform.platform()}')
8✔
122

123

124
def teardown_logger(verbose: int | None = None) -> None:
8✔
125
    """Clean up logging.
126

127
    :param verbose: the verbosity level (1 = INFO, 2 = ERROR).
128
    """
129
    if verbose is not None:
×
130
        if verbose >= 2:
×
131
            # https://playwright.dev/python/docs/debug#verbose-api-logs
132
            os.environ.pop('DEBUG', None)
×
133

134

135
def _expand_glob_files(
8✔
136
    filename: Path,
137
    default_path: Path,
138
    ext: str | None = None,
139
    prefix: str | None = None,
140
) -> list[Path]:
141
    """Searches for file both as specified and in the default directory, then retries with 'ext' extension if defined.
142

143
    :param filename: The filename.
144
    :param default_path: The default directory.
145
    :param ext: The extension, e.g. '.yaml', to add for searching if first scan fails.
146
    :param prefix: The prefix, e.g. 'config', to add with a hypen (e.g. 'config-') for searching if first scan fails.
147

148
    :returns: The filename, either original or one with path where found and/or extension.
149
    """
150
    search_filenames = [filename]
8✔
151

152
    # if ext is given, iterate both on raw filename and the filename with ext if different
153
    if ext and filename.suffix != ext:
8!
154
        search_filenames.append(filename.with_suffix(ext))
8✔
155

156
    # if prefix is given, iterate both on raw filename and the filename with prefix if different
157
    if prefix and not filename.name.startswith(prefix):
8!
158
        search_filenames.append(filename.with_stem(f'{prefix}-{filename.stem}'))
×
159
        if ext and filename.suffix != ext:
×
160
            search_filenames.append(filename.with_stem(f'{prefix}-{filename.stem}').with_suffix(ext))
×
161

162
    # try as given
163
    for file in search_filenames:
8✔
164
        # https://stackoverflow.com/questions/56311703/globbing-absolute-paths-with-pathlib
165
        file_list = list(Path(file.anchor).glob(str(file.relative_to(file.anchor))))
8✔
166
        if any(f.is_file() for f in file_list):
8!
167
            return file_list
×
168

169
        # no directory specified (and not in current one): add default one
170
        if not file.is_absolute() and not Path(file).parent == Path.cwd():
8!
171
            file_list = list(default_path.glob(str(file)))
8✔
172
            if any(f.is_file() for f in file_list):
8!
173
                return file_list
×
174

175
    # no matches found
176
    return [filename]
8✔
177

178

179
def locate_glob_files(
8✔
180
    filenames: list[Path],
181
    default_path: Path,
182
    ext: str | None = None,
183
    prefix: str | None = None,
184
) -> list[Path]:
185
    job_files = set()
8✔
186
    for filename in filenames:
8✔
187
        for file in _expand_glob_files(filename, default_path, ext, prefix):
8✔
188
            job_files.add(file)
8✔
189
    return list(job_files)
8✔
190

191

192
def locate_storage_file(
8✔
193
    filename: Path,
194
    default_path: Path,
195
    ext: str | None = None,
196
    prefix: str | None = None,
197
) -> Path:
198
    """Searches for file both as specified and in the default directory, then retries with 'ext' extension if defined.
199

200
    :param filename: The filename.
201
    :param default_path: The default directory.
202
    :param ext: The extension, e.g. '.yaml', to add for searching if first scan fails.
203
    :param prefix: The prefix, e.g. 'config', to add with a hypen (e.g. 'config-') for searching if first scan fails.
204

205
    :returns: The filename, either original or one with path where found and/or extension.
206
    """
207
    search_filenames = [filename]
8✔
208

209
    # if ext is given, iterate both on raw filename and the filename with ext if different
210
    if ext and filename.suffix != ext:
8!
211
        search_filenames.append(filename.with_suffix(ext))
8✔
212

213
    # if prefix is given, iterate both on raw filename and the filename with prefix if different
214
    if prefix and not filename.name.startswith(prefix):
8!
215
        search_filenames.append(filename.with_stem(f'{prefix}-{filename.stem}'))
×
216
        if ext and filename.suffix != ext:
×
217
            search_filenames.append(filename.with_stem(f'{prefix}-{filename.stem}').with_suffix(ext))
×
218

219
    for file in search_filenames:
8✔
220
        # return if found
221
        if file.is_file():
8!
222
            return file
×
223

224
        # no directory specified (and not in current one): add default one
225
        if file.parent == PurePath('.'):
8!
226
            new_file = default_path.joinpath(file)
8✔
227
            if new_file.is_file():
8!
228
                return new_file
×
229

230
    # no matches found
231
    return filename
8✔
232

233

234
def locate_storage_files(
8✔
235
    filename_list: list[Path],
236
    default_path: Path,
237
    ext: str | None = None,
238
    prefix: str | None = None,
239
) -> set[Path]:
240
    """Searches for file both as specified and in the default directory, then retries with 'ext' extension if defined.
241

242
    :param filename_list: The list of filenames.
243
    :param default_path: The default directory.
244
    :param ext: The extension, e.g. '.yaml', to add for searching if first scan fails.
245
    :param prefix: The prefix, e.g. 'config', to add with a hypen (e.g. 'config-') for searching if first scan fails.
246

247
    :returns: The list filenames, either originals or ones with path where found and/or extension.
248
    """
249
    filenames = set()
×
250
    for filename in filename_list:
×
251
        filenames.add(locate_storage_file(filename, default_path, ext, prefix))
×
252
    return filenames
×
253

254

255
def first_run(command_config: CommandConfig) -> None:
8✔
256
    """Create configuration and jobs files.
257

258
    :param command_config: the CommandConfig containing the command line arguments selected.
259
    """
260
    if not command_config.config_file.is_file():
8!
261
        command_config.config_file.parent.mkdir(parents=True, exist_ok=True)
8✔
262
        from webchanges.storage import YamlConfigStorage
8✔
263

264
        YamlConfigStorage.write_default_config(command_config.config_file)
8✔
265
        print(f'Created default config file at {command_config.config_file}')
8✔
266
        if not command_config.edit_config:
8!
267
            print(f'> Edit it with {__project_name__} --edit-config')
8✔
268
    if not any(f.is_file() for f in command_config.jobs_files):
8!
269
        command_config.jobs_files[0].parent.mkdir(parents=True, exist_ok=True)
8✔
270
        command_config.jobs_files[0].write_text(
8✔
271
            f'# {__project_name__} jobs file. See {__docs_url__}en/stable/jobs.html\n'
272
        )
273
        print(f'Created default jobs file at {command_config.jobs_files[0]}')
8✔
274
        if not command_config.edit:
8!
275
            print(f'> Edit it with {__project_name__} --edit')
8✔
276

277

278
def load_hooks(hooks_file: Path, is_default: bool = False) -> None:
8✔
279
    """Load hooks file."""
280
    if not hooks_file.is_file():
8!
281
        if is_default:
8✔
282
            logger.info(f'Hooks file {hooks_file} does not exist or is not a file')
8✔
283
        else:
284
            # do not use ImportWarning as it could be suppressed
285
            warnings.warn(
8✔
286
                f'Hooks file {hooks_file} not imported because it does not exist or is not a file',
287
                RuntimeWarning,
288
                stacklevel=1,
289
            )
290
        return
8✔
291

292
    hooks_file_errors = file_ownership_checks(hooks_file)
×
293
    if hooks_file_errors:
×
294
        logger.debug('Here should come the warning')
×
295
        # do not use ImportWarning as it could be suppressed
296
        warnings.warn(
×
297
            f'Hooks file {hooks_file} not not imported because{" and ".join(hooks_file_errors)}.\n'
298
            f'(see {__docs_url__}en/stable/hooks.html#important-note-for-hooks-file)',
299
            RuntimeWarning,
300
            stacklevel=1,
301
        )
302
    else:
303
        logger.info(f'Importing into hooks module from {hooks_file}')
×
304
        import_module_from_source('hooks', hooks_file)
×
305
        logger.info('Finished importing into hooks module')
×
306

307

308
def handle_unitialized_actions(urlwatch_config: CommandConfig) -> None:
8✔
309
    """Handles CLI actions that do not require all classes etc. to be initialized (and command.py loaded). For speed
310
    purposes."""
311

312
    def _exit(arg: str | int | None) -> None:
2✔
313
        logger.info(f'Exiting with exit code {arg}')
2✔
314
        sys.exit(arg)
2✔
315

316
    def print_new_version() -> int:
2✔
317
        """Will print alert message if a newer version is found on PyPi."""
318
        print(f'{__project_name__} {__version__}.', end='')
2✔
319
        new_release = get_new_version_number(timeout=2)
2✔
320
        if new_release:
2!
321
            print(
×
322
                f'\nNew release version {new_release} is available; we recommend updating using e.g. '
323
                f"'pip install -U {__project_name__}'."
324
            )
325
            return 0
×
326
        elif new_release == '':
2!
327
            print(' You are running the latest release.')
2✔
328
            return 0
2✔
329
        else:
330
            print(' Error contacting PyPI to determine the latest release.')
×
331
            return 1
×
332

333
    def playwright_install_chrome() -> int:  # pragma: no cover
334
        """
335
        Replicates playwright.___main__.main() function, which is called by the playwright executable, in order to
336
        install the browser executable.
337

338
        :return: Playwright's executable return code.
339
        """
340
        try:
341
            from playwright._impl._driver import compute_driver_executable
342
        except ImportError:  # pragma: no cover
343
            raise ImportError('Python package playwright is not installed; cannot install the Chrome browser') from None
344

345
        driver_executable = compute_driver_executable()
346
        env = os.environ.copy()
347
        env['PW_CLI_TARGET_LANG'] = 'python'
348
        cmd = [str(driver_executable), 'install', 'chrome']
349
        logger.info(f'Running playwright CLI: {" ".join(cmd)}')
350
        completed_process = subprocess.run(cmd, env=env, capture_output=True, text=True)  # noqa: S603
351
        if completed_process.returncode:
352
            print(completed_process.stderr)
353
            return completed_process.returncode
354
        if completed_process.stdout:
355
            logger.info(f'Success! Output of Playwright CLI: {completed_process.stdout}')
356
        return 0
357

358
    if urlwatch_config.check_new:
2✔
359
        _exit(print_new_version())
2✔
360

361
    if urlwatch_config.install_chrome:  # pragma: no cover
362
        _exit(playwright_install_chrome())
363

364

365
def main() -> None:  # pragma: no cover
366
    """The entry point run when __name__ == '__main__'.
367

368
    Contains all the high-level logic to instantiate all classes that run the program.
369

370
    :raises NotImplementedError: If a `--database-engine` is specified that is not supported.
371
    :raises RuntimeError: If `--database-engine redis` is selected but `--cache` with a redis URI is not provided.
372
    """
373
    # Make sure that PendingDeprecationWarning are displayed from all modules (otherwise only those in __main__ are)
374
    warnings.filterwarnings('default', category=PendingDeprecationWarning)
375

376
    # Issue deprecation warning if running on minimum version supported
377
    python_version_warning()
378

379
    # Path where the config, jobs and hooks files are located
380
    if os.name != 'nt':
381
        config_path = platformdirs.user_config_path(__project_name__)  # typically ~/.config/{__project_name__}
382
    else:
383
        config_path = platformdirs.user_documents_path().joinpath(__project_name__)
384

385
    # Path where the snapshot database is located; typically ~/.local/share/{__project_name__} or
386
    # $XDG_DATA_HOME/{__project_name__} # in linux, ~/Library/Application Support/webchanges in macOS  and
387
    # or %LOCALAPPDATA%\{__project_name__}\{__project_name__} in Windows
388
    data_path = platformdirs.user_data_path(__project_name__, __project_name__.capitalize())
389

390
    # Default config, jobs, hooks and ssdb (database) files
391
    default_config_file = config_path.joinpath('config.yaml')
392
    default_jobs_file = config_path.joinpath('jobs.yaml')
393
    default_hooks_file = config_path.joinpath('hooks.py')
394
    default_ssdb_file = data_path.joinpath('snapshots.db')
395

396
    # Check for and if found migrate snapshot database file from version <= 3.21, which was called cache.db and located
397
    # in user_cache_path
398
    migrate_from_legacy('webchanges', ssdb_file=default_ssdb_file)
399

400
    # Check for and if found migrate legacy (urlwatch) files
401
    migrate_from_legacy('urlwatch', default_config_file, default_jobs_file, default_hooks_file, default_ssdb_file)
402

403
    # Parse command line arguments
404
    command_config = CommandConfig(
405
        sys.argv[1:],
406
        config_path,
407
        default_config_file,
408
        default_jobs_file,
409
        default_hooks_file,
410
        default_ssdb_file,
411
    )
412

413
    # Set up the logger to verbose if needed
414
    setup_logger(command_config.verbose, command_config.log_file)
415

416
    # log defaults
417
    logger.debug(f'Default config path is {config_path}')
418
    logger.debug(f'Default data path is {data_path}')
419

420
    # For speed, run these here
421
    handle_unitialized_actions(command_config)
422

423
    # Only now, after configuring logging, we can load other modules
424
    from webchanges.command import UrlwatchCommand
425
    from webchanges.main import Urlwatch
426
    from webchanges.storage import (
427
        SsdbDirStorage,
428
        SsdbRedisStorage,
429
        SsdbSQLite3Storage,
430
        SsdbStorage,
431
        YamlConfigStorage,
432
        YamlJobsStorage,
433
    )
434

435
    # Locate config, jobs, hooks and database files
436
    command_config.config_file = locate_storage_file(
437
        filename=command_config.config_file,
438
        default_path=command_config.config_path,
439
        ext='.yaml',
440
        prefix='config',
441
    )
442
    command_config.jobs_files = locate_glob_files(
443
        filenames=command_config.jobs_files,
444
        default_path=command_config.config_path,
445
        ext='.yaml',
446
        prefix='jobs',
447
    )
448
    command_config.hooks_files = locate_glob_files(
449
        filenames=command_config.hooks_files,
450
        default_path=command_config.config_path,
451
        ext='.py',
452
        prefix='hooks',
453
    )
454
    command_config.ssdb_file = locate_storage_file(
455
        filename=command_config.ssdb_file,
456
        default_path=data_path,
457
        ext='.db',
458
    )
459

460
    # Check for first run
461
    if command_config.config_file == default_config_file and not Path(command_config.config_file).is_file():
462
        first_run(command_config)
463

464
    # Setup config file API
465
    config_storage = YamlConfigStorage(command_config.config_file)  # storage.py
466

467
    # load config (which for syntax checking requires hooks to be loaded too)
468
    if command_config.hooks_files:
469
        logger.debug(f'Hooks files to be loaded: {command_config.hooks_files}')
470
        for hooks_file in command_config.hooks_files:
471
            load_hooks(hooks_file, is_default=not command_config.hooks_files_inputted)
472
    config_storage.load()
473

474
    # Setup database API
475
    database_engine = (
476
        command_config.database_engine or config_storage.config.get('database', {}).get('engine') or 'sqlite3'
477
    )  # "or 'sqlite3'" is not needed except for a mypy bug; same for the "or 4" below
478
    max_snapshots = command_config.max_snapshots or config_storage.config.get('database', {}).get('max_snapshots') or 4
479
    if database_engine == 'sqlite3':
480
        ssdb_storage: SsdbStorage = SsdbSQLite3Storage(command_config.ssdb_file, max_snapshots)  # storage.py
481
    elif any(str(command_config.ssdb_file).startswith(prefix) for prefix in {'redis://', 'rediss://'}):
482
        ssdb_storage = SsdbRedisStorage(command_config.ssdb_file)  # storage.py
483
    elif database_engine.startswith('redis'):
484
        ssdb_storage = SsdbRedisStorage(database_engine)
485
    elif database_engine == 'textfiles':
486
        ssdb_storage = SsdbDirStorage(command_config.ssdb_file)  # storage.py
487
    elif database_engine == 'minidb':
488
        # legacy code imported only if needed (requires minidb, which is not a dependency)
489
        from webchanges.storage_minidb import SsdbMiniDBStorage
490

491
        ssdb_storage = SsdbMiniDBStorage(command_config.ssdb_file)  # storage.py
492
    else:
493
        raise NotImplementedError(f'Database engine {database_engine} not implemented')
494

495
    # Setup jobs file API
496
    jobs_storage = YamlJobsStorage(command_config.jobs_files)  # storage.py
497

498
    # Setup 'webchanges'
499
    urlwatcher = Urlwatch(command_config, config_storage, ssdb_storage, jobs_storage)  # main.py
500
    urlwatch_command = UrlwatchCommand(urlwatcher)  # command.py
501

502
    # Run 'webchanges', starting with processing command line arguments
503
    urlwatch_command.run()
504

505
    # Remove Playwright debug mode if there
506
    teardown_logger(command_config.verbose)
507

508

509
if __name__ == '__main__':
510
    main()
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc