• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

mborsetti / webchanges / 13100552072

02 Feb 2025 04:30PM UTC coverage: 75.393% (-0.2%) from 75.597%
13100552072

push

github

mborsetti
Version 3.27.0b3

1712 of 2597 branches covered (65.92%)

Branch coverage included in aggregate %.

4517 of 5665 relevant lines covered (79.74%)

6.18 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

65.74
/webchanges/cli.py
1
#!/usr/bin/env python3
2

3
"""Module containing the entry point: the function main()."""
2✔
4

5
# See config module for the command line arguments.
6

7
# The code below is subject to the license contained in the LICENSE file, which is part of the source code.
8

9
from __future__ import annotations
8✔
10

11
import logging
8✔
12
import os
8✔
13
import platform
8✔
14
import shutil
8✔
15
import signal
8✔
16
import subprocess  # noqa: S404 Consider possible security implications associated with the subprocess module.
8✔
17
import sys
8✔
18
import warnings
8✔
19
from pathlib import Path, PurePath
8✔
20

21
import platformdirs
8✔
22

23
from webchanges import __copyright__, __docs_url__, __min_python_version__, __project_name__, __version__
8✔
24
from webchanges.config import CommandConfig
8✔
25
from webchanges.util import file_ownership_checks, get_new_version_number, import_module_from_source
8✔
26

27
# Ignore signal SIGPIPE ("broken pipe") for stdout (see https://github.com/thp/urlwatch/issues/77)
28
if os.name != 'nt':  # Windows does not have signal.SIGPIPE
8!
29
    signal.signal(signal.SIGPIPE, signal.SIG_DFL)  # type: ignore[attr-defined]  # not defined in Windows
8✔
30

31
logger = logging.getLogger(__name__)
8✔
32

33

34
def python_version_warning() -> None:
8✔
35
    """Check if we're running on the minimum Python version supported and if so print and issue a pending deprecation
36
    warning."""
37
    if sys.version_info[0:2] == __min_python_version__:
8✔
38
        current_minor_version = '.'.join(str(n) for n in sys.version_info[0:2])
2!
39
        next_minor_version = f'{__min_python_version__[0]}.{__min_python_version__[1] + 1}'
2✔
40
        warning = (
2✔
41
            f'Support for Python {current_minor_version} will be ending three years from the date Python '
42
            f'{next_minor_version} was released'
43
        )
44
        print(f'WARNING: {warning}\n')
2✔
45
        PendingDeprecationWarning(warning)
2✔
46

47

48
def migrate_from_legacy(
8✔
49
    legacy_package: str,
50
    config_file: Path | None = None,
51
    jobs_file: Path | None = None,
52
    hooks_file: Path | None = None,
53
    ssdb_file: Path | None = None,
54
) -> None:
55
    """Check for existence of legacy files for configuration, jobs and Python hooks and migrate them (i.e. make a copy
56
    to new folder and/or name). Original files are not deleted.
57

58
    :param legacy_package: The name of the legacy package to migrate (e.g. urlwatch).
59
    :param config_file: The new Path to the configuration file.
60
    :param jobs_file: The new Path to the jobs file.
61
    :param hooks_file: The new Path to the hooks file.
62
    :param ssdb_file: The new Path to the snapshot database file.
63
    """
64
    legacy_project_path = Path.home().joinpath(f'.{legacy_package}')
8✔
65
    leagacy_config_file = legacy_project_path.joinpath(f'{legacy_package}.yaml')
8✔
66
    legacy_urls_file = legacy_project_path.joinpath('urls.yaml')
8✔
67
    legacy_hooks_file = legacy_project_path.joinpath('hooks.py')
8✔
68
    legacy_cache_path = platformdirs.user_cache_path(legacy_package)
8✔
69
    legacy_cache_file = legacy_cache_path.joinpath('cache.db')
8✔
70
    for old_file, new_file in zip(
8✔
71
        (leagacy_config_file, legacy_urls_file, legacy_hooks_file, legacy_cache_file),
72
        (config_file, jobs_file, hooks_file, ssdb_file),
73
    ):
74
        if new_file and old_file.is_file() and not new_file.is_file():
8!
75
            new_file.parent.mkdir(parents=True, exist_ok=True)
×
76
            shutil.copyfile(old_file, new_file)
×
77
            logger.warning(f"Copied {legacy_package} '{old_file}' file to {__project_name__} '{new_file}'.")
×
78
            logger.warning(f"You can safely delete '{old_file}'.")
×
79

80

81
def setup_logger(verbose: int | None = None, log_file: Path | None = None) -> None:
8✔
82
    """Set up the logger.
83

84
    :param verbose: the verbosity level (1 = INFO, 2 = ERROR, 3 = NOTSET).
85
    """
86
    if log_file:
8!
87
        handlers: tuple[logging.Handler, ...] | None = (logging.FileHandler(log_file),)
×
88
        if not verbose:
×
89
            verbose = 1
×
90
    else:
91
        handlers = None
8✔
92

93
    log_level = None
8✔
94

95
    if verbose is not None:
8!
96
        if verbose >= 3:
×
97
            log_level = 'NOTSET'
×
98
            # https://playwright.dev/python/docs/debug#verbose-api-logs
99
            os.environ['DEBUG'] = 'pw:api pytest -s'
×
100
        if verbose >= 2:
×
101
            log_level = 'DEBUG'
×
102
            # https://playwright.dev/python/docs/debug#verbose-api-logs
103
            os.environ['DEBUG'] = 'pw:api pytest -s'
×
104
        elif verbose == 1:
×
105
            log_level = 'INFO'
×
106

107
    if not verbose:
8!
108
        sys.tracebacklimit = 0
8✔
109

110
    logging.basicConfig(
8✔
111
        format='%(asctime)s %(module)s[%(thread)s] %(levelname)s: %(message)s',
112
        level=log_level,
113
        handlers=handlers,
114
    )
115
    logger.info(f'{__project_name__}: {__version__} {__copyright__}')
8✔
116
    logger.info(
8✔
117
        f'{platform.python_implementation()}: {platform.python_version()} '
118
        f'{platform.python_build()} {platform.python_compiler()}'
119
    )
120
    logger.info(f'System: {platform.platform()}')
8✔
121

122

123
def teardown_logger(verbose: int | None = None) -> None:
8✔
124
    """Clean up logging.
125

126
    :param verbose: the verbosity level (1 = INFO, 2 = ERROR).
127
    """
128
    if verbose is not None:
×
129
        if verbose >= 2:
×
130
            # https://playwright.dev/python/docs/debug#verbose-api-logs
131
            os.environ.pop('DEBUG', None)
×
132

133

134
def _expand_glob_files(filename: Path, default_path: Path, ext: str | None = None) -> list[Path]:
8✔
135
    """Searches for file both as specified and in the default directory, then retries with 'ext' extension if defined.
136

137
    :param filename: The filename.
138
    :param default_path: The default directory.
139
    :param ext: The extension, e.g. '.yaml', to add for searching if first scan fails.
140

141
    :returns: The filename, either original or one with path where found and/or extension.
142
    """
143
    search_filenames = [filename]
8✔
144

145
    # if ext is given, iterate both on raw filename and the filename with ext if different
146
    if ext and filename.suffix != ext:
8!
147
        search_filenames.append(filename.with_suffix(ext))
8✔
148
        # also iterate on file pre-pended with 'jobs-'
149
        search_filenames.append(filename.with_stem(f'jobs-{filename.stem}').with_suffix(ext))
8✔
150

151
    # try as given
152
    for file in search_filenames:
8✔
153
        # https://stackoverflow.com/questions/56311703/globbing-absolute-paths-with-pathlib
154
        file_list = list(Path(file.anchor).glob(str(file.relative_to(file.anchor))))
8✔
155
        if any(f.is_file() for f in file_list):
8!
156
            return file_list
×
157

158
        # no directory specified (and not in current one): add default one
159
        if not file.is_absolute() and not Path(file).parent == Path.cwd():
8!
160
            file_list = list(default_path.glob(str(file)))
8✔
161
            if any(f.is_file() for f in file_list):
8!
162
                return file_list
×
163

164
    # no matches found
165
    return [filename]
8✔
166

167

168
def locate_glob_files(filenames: list[Path], default_path: Path, ext: str | None = None) -> list[Path]:
8✔
169
    job_files = set()
8✔
170
    for filename in filenames:
8✔
171
        for file in _expand_glob_files(filename, default_path, ext):
8✔
172
            job_files.add(file)
8✔
173
    return list(job_files)
8✔
174

175

176
def locate_storage_file(filename: Path, default_path: Path, ext: str | None = None) -> Path:
8✔
177
    """Searches for file both as specified and in the default directory, then retries with 'ext' extension if defined.
178

179
    :param filename: The filename.
180
    :param default_path: The default directory.
181
    :param ext: The extension, e.g. '.yaml', to add for searching if first scan fails.
182

183
    :returns: The filename, either original or one with path where found and/or extension.
184
    """
185
    search_filenames = [filename]
8✔
186

187
    # if ext is given, iterate both on raw filename and the filename with ext if different
188
    if ext and filename.suffix != ext:
8!
189
        search_filenames.append(filename.with_suffix(ext))
8✔
190

191
    for file in search_filenames:
8✔
192
        # return if found
193
        if file.is_file():
8!
194
            return file
×
195

196
        # no directory specified (and not in current one): add default one
197
        if file.parent == PurePath('.'):
8!
198
            new_file = default_path.joinpath(file)
8✔
199
            if new_file.is_file():
8!
200
                return new_file
×
201

202
    # no matches found
203
    return filename
8✔
204

205

206
def locate_storage_files(filename_list: list[Path], default_path: Path, ext: str | None = None) -> set[Path]:
8✔
207
    """Searches for file both as specified and in the default directory, then retries with 'ext' extension if defined.
208

209
    :param filename_list: The list of filenames.
210
    :param default_path: The default directory.
211
    :param ext: The extension, e.g. '.yaml', to add for searching if first scan fails.
212

213
    :returns: The list filenames, either originals or ones with path where found and/or extension.
214
    """
215
    filenames = set()
×
216
    for filename in filename_list:
×
217
        filenames.add(locate_storage_file(filename, default_path, ext))
×
218
    return filenames
×
219

220

221
def first_run(command_config: CommandConfig) -> None:
8✔
222
    """Create configuration and jobs files.
223

224
    :param command_config: the CommandConfig containing the command line arguments selected.
225
    """
226
    if not command_config.config_file.is_file():
8!
227
        command_config.config_file.parent.mkdir(parents=True, exist_ok=True)
8✔
228
        from webchanges.storage import YamlConfigStorage
8✔
229

230
        YamlConfigStorage.write_default_config(command_config.config_file)
8✔
231
        print(f'Created default config file at {command_config.config_file}')
8✔
232
        if not command_config.edit_config:
8!
233
            print(f'> Edit it with {__project_name__} --edit-config')
8✔
234
    if not any(f.is_file() for f in command_config.jobs_files):
8!
235
        command_config.jobs_files[0].parent.mkdir(parents=True, exist_ok=True)
8✔
236
        command_config.jobs_files[0].write_text(
8✔
237
            f'# {__project_name__} jobs file. See {__docs_url__}en/stable/jobs.html\n'
238
        )
239
        print(f'Created default jobs file at {command_config.jobs_files[0]}')
8✔
240
        if not command_config.edit:
8!
241
            print(f'> Edit it with {__project_name__} --edit')
8✔
242

243

244
def load_hooks(hooks_file: Path) -> None:
8✔
245
    """Load hooks file."""
246
    if not hooks_file.is_file():
4!
247
        warnings.warn(
4✔
248
            f'Hooks file not imported because {hooks_file} is not a file',
249
            ImportWarning,
250
        )
251
        return
4✔
252

253
    hooks_file_errors = file_ownership_checks(hooks_file)
×
254
    if hooks_file_errors:
×
255
        warnings.warn(
×
256
            f'Hooks file {hooks_file} not imported because '
257
            f" {' and '.join(hooks_file_errors)}.\n"
258
            f'(see {__docs_url__}en/stable/hooks.html#important-note-for-hooks-file)',
259
            ImportWarning,
260
        )
261
    else:
262
        logger.info(f'Importing hooks module from {hooks_file}')
×
263
        import_module_from_source('hooks', hooks_file)
×
264
        logger.info('Finished importing hooks module')
×
265

266

267
def handle_unitialized_actions(urlwatch_config: CommandConfig) -> None:
8✔
268
    """Handles CLI actions that do not require all classes etc. to be initialized (and command.py loaded). For speed
269
    purposes."""
270

271
    def _exit(arg: str | int | None) -> None:
2✔
272
        logger.info(f'Exiting with exit code {arg}')
2✔
273
        sys.exit(arg)
2✔
274

275
    def print_new_version() -> int:
2✔
276
        """Will print alert message if a newer version is found on PyPi."""
277
        print(f'{__project_name__} {__version__}.', end='')
2✔
278
        new_release = get_new_version_number(timeout=2)
2✔
279
        if new_release:
2!
280
            print(
×
281
                f'\nNew release version {new_release} is available; we recommend updating using e.g. '
282
                f"'pip install -U {__project_name__}'."
283
            )
284
            return 0
×
285
        elif new_release == '':
2!
286
            print(' You are running the latest release.')
2✔
287
            return 0
2✔
288
        else:
289
            print(' Error contacting PyPI to determine the latest release.')
×
290
            return 1
×
291

292
    def playwright_install_chrome() -> int:  # pragma: no cover
293
        """
294
        Replicates playwright.___main__.main() function, which is called by the playwright executable, in order to
295
        install the browser executable.
296

297
        :return: Playwright's executable return code.
298
        """
299
        try:
300
            from playwright._impl._driver import compute_driver_executable
301
        except ImportError:  # pragma: no cover
302
            raise ImportError('Python package playwright is not installed; cannot install the Chrome browser') from None
303

304
        driver_executable = compute_driver_executable()
305
        env = os.environ.copy()
306
        env['PW_CLI_TARGET_LANG'] = 'python'
307
        cmd = [str(driver_executable), 'install', 'chrome']
308
        logger.info(f"Running playwright CLI: {' '.join(cmd)}")
309
        completed_process = subprocess.run(cmd, env=env, capture_output=True, text=True)  # noqa: S603
310
        if completed_process.returncode:
311
            print(completed_process.stderr)
312
            return completed_process.returncode
313
        if completed_process.stdout:
314
            logger.info(f'Success! Output of Playwright CLI: {completed_process.stdout}')
315
        return 0
316

317
    if urlwatch_config.check_new:
2✔
318
        _exit(print_new_version())
2✔
319

320
    if urlwatch_config.install_chrome:  # pragma: no cover
321
        _exit(playwright_install_chrome())
322

323

324
def main() -> None:  # pragma: no cover
325
    """The entry point run when __name__ == '__main__'.
326

327
    Contains all the high-level logic to instantiate all classes that run the program.
328

329
    :raises NotImplementedError: If a `--database-engine` is specified that is not supported.
330
    :raises RuntimeError: If `--database-engine redis` is selected but `--cache` with a redis URI is not provided.
331
    """
332
    # Make sure that PendingDeprecationWarning are displayed from all modules (otherwise only those in __main__ are)
333
    warnings.filterwarnings('default', category=PendingDeprecationWarning)
334

335
    # Issue deprecation warning if running on minimum version supported
336
    python_version_warning()
337

338
    # Path where the config, jobs and hooks files are located
339
    if os.name != 'nt':
340
        config_path = platformdirs.user_config_path(__project_name__)  # typically ~/.config/{__project_name__}
341
    else:
342
        config_path = platformdirs.user_documents_path().joinpath(__project_name__)
343

344
    # Path where the snapshot database is located; typically ~/.local/share/{__project_name__} or
345
    # $XDG_DATA_HOME/{__project_name__} # in linux, ~/Library/Application Support/webchanges in macOS  and
346
    # or %LOCALAPPDATA%\{__project_name__}\{__project_name__} in Windows
347
    data_path = platformdirs.user_data_path(__project_name__, __project_name__.capitalize())
348

349
    # Default config, jobs, hooks and ssdb (database) files
350
    default_config_file = config_path.joinpath('config.yaml')
351
    default_jobs_file = config_path.joinpath('jobs.yaml')
352
    default_hooks_file = config_path.joinpath('hooks.py')
353
    default_ssdb_file = data_path.joinpath('snapshots.db')
354

355
    # Check for and if found migrate snapshot database file from version <= 3.21, which was called cache.db and located
356
    # in user_cache_path
357
    migrate_from_legacy('webchanges', ssdb_file=default_ssdb_file)
358

359
    # Check for and if found migrate legacy (urlwatch) files
360
    migrate_from_legacy('urlwatch', default_config_file, default_jobs_file, default_hooks_file, default_ssdb_file)
361

362
    # Parse command line arguments
363
    command_config = CommandConfig(
364
        sys.argv[1:],
365
        config_path,
366
        default_config_file,
367
        default_jobs_file,
368
        default_hooks_file,
369
        default_ssdb_file,
370
    )
371

372
    # Set up the logger to verbose if needed
373
    setup_logger(command_config.verbose, command_config.log_file)
374

375
    # For speed, run these here
376
    handle_unitialized_actions(command_config)
377

378
    # Only now, after configuring logging, we can load other modules
379
    from webchanges.command import UrlwatchCommand
380
    from webchanges.main import Urlwatch
381
    from webchanges.storage import (
382
        SsdbDirStorage,
383
        SsdbRedisStorage,
384
        SsdbSQLite3Storage,
385
        SsdbStorage,
386
        YamlConfigStorage,
387
        YamlJobsStorage,
388
    )
389

390
    # Locate config, job and hooks files
391
    command_config.config_file = locate_storage_file(command_config.config_file, command_config.config_path, '.yaml')
392
    command_config.jobs_files = locate_glob_files(command_config.jobs_files, command_config.config_path, '.yaml')
393
    command_config.hooks_files = locate_glob_files(command_config.hooks_files, command_config.config_path, '.py')
394

395
    # Check for first run
396
    if command_config.config_file == default_config_file and not Path(command_config.config_file).is_file():
397
        first_run(command_config)
398

399
    # Setup config file API
400
    config_storage = YamlConfigStorage(command_config.config_file)  # storage.py
401

402
    # load config (which for syntax checking requires hooks to be loaded too)
403
    if command_config.hooks_files:
404
        for hooks_file in command_config.hooks_files:
405
            load_hooks(hooks_file)
406
    config_storage.load()
407

408
    # Setup database API
409
    database_engine = (
410
        command_config.database_engine or config_storage.config.get('database', {}).get('engine') or 'sqlite3'
411
    )  # "or 'sqlite3'" is not needed except for a mypy bug; same for the "or 4" below
412
    max_snapshots = command_config.max_snapshots or config_storage.config.get('database', {}).get('max_snapshots') or 4
413
    if database_engine == 'sqlite3':
414
        ssdb_storage: SsdbStorage = SsdbSQLite3Storage(command_config.ssdb_file, max_snapshots)  # storage.py
415
    elif any(str(command_config.ssdb_file).startswith(prefix) for prefix in {'redis://', 'rediss://'}):
416
        ssdb_storage = SsdbRedisStorage(command_config.ssdb_file)  # storage.py
417
    elif database_engine.startswith('redis'):
418
        ssdb_storage = SsdbRedisStorage(database_engine)
419
    elif database_engine == 'textfiles':
420
        ssdb_storage = SsdbDirStorage(command_config.ssdb_file)  # storage.py
421
    elif database_engine == 'minidb':
422
        # legacy code imported only if needed (requires minidb, which is not a dependency)
423
        from webchanges.storage_minidb import SsdbMiniDBStorage
424

425
        ssdb_storage = SsdbMiniDBStorage(command_config.ssdb_file)  # storage.py
426
    else:
427
        raise NotImplementedError(f'Database engine {database_engine} not implemented')
428

429
    # Setup jobs file API
430
    jobs_storage = YamlJobsStorage(command_config.jobs_files)  # storage.py
431

432
    # Setup 'webchanges'
433
    urlwatcher = Urlwatch(command_config, config_storage, ssdb_storage, jobs_storage)  # main.py
434
    urlwatch_command = UrlwatchCommand(urlwatcher)  # command.py
435

436
    # Run 'webchanges', starting with processing command line arguments
437
    urlwatch_command.run()
438

439
    # Remove Playwright debug mode if there
440
    teardown_logger(command_config.verbose)
441

442

443
if __name__ == '__main__':
444
    main()
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc