• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

idlesign / torrt / 25475794615

07 May 2026 04:16AM UTC coverage: 60.464% (-7.1%) from 67.593%
25475794615

Pull #95

github

web-flow
Merge dccd57d45 into 5d89e3750
Pull Request #95: Fix tests on Windows

2 of 3 new or added lines in 1 file covered. (66.67%)

119 existing lines in 7 files now uncovered.

1017 of 1682 relevant lines covered (60.46%)

1.81 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

64.1
/src/torrt/base_tracker.py
1
import re
3✔
2
from datetime import datetime
3✔
3
from http.cookiejar import CookieJar
3✔
4
from itertools import chain
3✔
5
from locale import LC_ALL, getlocale, setlocale
3✔
6
from typing import ClassVar
3✔
7
from urllib.parse import parse_qs, urljoin, urlparse
3✔
8

9
from .exceptions import TorrtTrackerException
3✔
10
from .utils import (
3✔
11
    BeautifulSoup,
12
    HttpClient,
13
    PageData,
14
    Response,
15
    TorrentData,
16
    TrackerClassesRegistry,
17
    TrackerObjectsRegistry,
18
    WithSettings,
19
    encode_value,
20
    make_soup,
21
    parse_torrent,
22
)
23

24

25
class BaseTracker(WithSettings):
3✔
26
    """Base torrent tracker handler class offering helper methods for its ancestors."""
27

28
    config_entry_name: str = 'trackers'
3✔
29

30
    active: bool = True
3✔
31
    """Tracker support flag. Can be used to skip initialization for currently unavailable trackers."""
3✔
32

33
    alias: str = None
3✔
34
    """Tracker alias. Usually main tracker domain. See also `mirrors` attribute."""
3✔
35

36
    mirrors: ClassVar[list[str]] = []
3✔
37
    """List of mirror domain names."""
3✔
38

39
    encoding: str | None = None
3✔
40
    """Tracker html page encoding (cp1251 or other)."""
3✔
41

42
    test_urls: ClassVar[list[str]] = []
3✔
43
    """Page URLs for automatic tests of torrent extraction."""
3✔
44

45
    raise_on_error_response: bool = False
3✔
46
    """Whether to raise an exception on request errors.
3✔
47
    Primary use is debug and testsuite.
48
    
49
    """
50

51
    request_timeout: float | int = 4
3✔
52

53
    def __init__(self, *, cookies: dict[str, str] | None = None, query_string: str = '', **kwargs):
3✔
54
        self.mirror_picked: str | None = None
3✔
55

56
        if cookies is None:
3✔
57
            cookies = {}
3✔
58

59
        self.cookies = cookies
3✔
60
        self.query_string = query_string
3✔
61

62
        # Cached data for currently processed torrent.
63
        self._torrent_page_url: str = ''
3✔
64
        self._torrent_page: BeautifulSoup | None = None
3✔
65

66
        self.client = HttpClient(
3✔
67
            silence_exceptions=not self.raise_on_error_response,
68
            dump_fname_tpl=f'%(ts)s_{self.__class__.__name__}.html'
69
        )
70

71
        super().__init__()
3✔
72

73
    def __init_subclass__(cls, **kwargs):
3✔
74
        if cls.alias and cls.active:
3✔
75
            TrackerClassesRegistry.add(cls)
3✔
76

77
    def get_query_string(self) -> str:
3✔
78
        return self.query_string
3✔
79

80
    def encode_value(self, value: str) -> bytes | str:
3✔
81
        """Encodes a value.
82

83
        :param value:
84

85
        """
86
        return encode_value(value, encoding=self.encoding)
×
87

88
    def pick_mirror(self, url: str) -> str:
3✔
89
        """Probes mirrors (domains) one by one and chooses one whick is available to use.
90

91
        :param url:
92

93
        """
94
        mirror_picked = self.mirror_picked
3✔
95

96
        if mirror_picked is None:
3✔
97
            self.log_debug('Picking a mirror ...')
3✔
98

99
            original_domain = self.extract_domain(url)
3✔
100
            mirror_picked = original_domain
3✔
101

102
            for mirror_domain in self.mirrors:
3✔
103
                mirror_url = f'{self.extract_scheme(url)}://{mirror_domain}'
3✔
104

105
                self.log_debug(f'Probing mirror: `{mirror_url}` ...')
3✔
106

107
                response = self.client.request(
3✔
108
                    mirror_url,
109
                    timeout=self.request_timeout,
110
                    silence_exceptions=True,
111
                )
112

113
                if response and response.url.startswith(mirror_url):
3✔
UNCOV
114
                    mirror_picked = mirror_domain
×
UNCOV
115
                    break
×
116

117
            self.mirror_picked = mirror_picked
3✔
118

119
        return mirror_picked
3✔
120

121
    def get_mirrored_url(self, url: str) -> str:
3✔
122
        """Returns a mirrored URL for a given one.
123

124
        :param url:
125

126
        """
127
        mirror_picked = self.mirror_picked
3✔
128
        original_domain = self.extract_domain(url)
3✔
129
        url_mirror = url.replace(original_domain, mirror_picked)
3✔
130
        return url_mirror
3✔
131

132
    def register(self):
3✔
133
        """Adds this object into TrackerObjectsRegistry."""
134

135
        TrackerObjectsRegistry.add(self)
3✔
136

137
    @classmethod
3✔
138
    def can_handle(cls, string: str) -> bool:
3✔
139
        """Returns boolean whether this tracker can handle torrent from string.
140

141
        :param string: String, describing torrent. E.g. URL from torrent comment.
142

143
        """
144
        for domain in chain([cls.alias], cls.mirrors):
3✔
145
            if domain in string:
3✔
146
                return True
3✔
147
        return False
3✔
148

149
    @classmethod
3✔
150
    def extract_scheme(cls, url: str) -> str:
3✔
151
        """Extracts scheme from a given URL.
152

153
        :param url:
154

155
        """
156
        return urlparse(url).scheme
3✔
157

158
    @classmethod
3✔
159
    def extract_domain(cls, url: str) -> str:
3✔
160
        """Extracts domain from a given URL.
161

162
        :param url:
163

164
        """
165
        return urlparse(url).netloc
3✔
166

167
    def get_response(
3✔
168
            self,
169
            url: str,
170
            *,
171
            form_data: dict | None = None,
172
            allow_redirects: bool = True,
173
            referer: str = '',
174
            cookies: dict | CookieJar | None = None,
175
            query_string: str = '',
176
            as_soup: bool = False
177

178
    ) -> Response | BeautifulSoup | None:
179
        """Returns an HTTP resource object from given URL.
180

181
        If a dictionary is passed in `form_data` POST HTTP method
182
        would be used to pass data to resource (even if that dictionary is empty).
183

184
        :param url: URL to get data from
185

186
        :param form_data: data for POST
187

188
        :param allow_redirects: whether to follow server redirects
189

190
        :param referer: data to put into Referer header
191

192
        :param cookies: cookies to use
193

194
        :param query_string:  query string (GET parameters) to add to URL
195

196
        :param as_soup: whether to return BeautifulSoup object instead of Requests response
197

198
        """
199
        if query_string:
3✔
200

201
            delim = '?'
3✔
202

203
            if '?' in url:
3✔
204
                delim = '&'
3✔
205

206
            url = f'{url}{delim}{query_string}'
3✔
207

208
        self.pick_mirror(url)
3✔
209

210
        url = self.get_mirrored_url(url)
3✔
211

212
        result = self.client.request(
3✔
213
            url=url,
214
            data=form_data,
215
            referer=referer,
216
            allow_redirects=allow_redirects,
217
            cookies=cookies,
218
        )
219

UNCOV
220
        if result is not None and as_soup:
×
UNCOV
221
            result = self.make_page_soup(result.text)
×
222

UNCOV
223
        return result
×
224

225
    @classmethod
3✔
226
    def make_page_soup(cls, html: str) -> BeautifulSoup:
3✔
227
        """Returns BeautifulSoup object from a html.
228

229
        :param html:
230

231
        """
UNCOV
232
        return make_soup(html)
×
233

234
    @classmethod
3✔
235
    def find_links(cls, url: str, page_soup: BeautifulSoup, *, definite: str = '') -> str | None | list[str]:
3✔
236
        """Returns a list with hyperlinks found in supplied page_soup
237
        or a definite link.
238

239
        :param url: page URL
240
        :param page_soup: page soup
241
        :param definite: regular expression to match link
242

243
        """
UNCOV
244
        if not page_soup:
×
245
            return None if definite else []
×
246

UNCOV
247
        if definite:
×
UNCOV
248
            link = page_soup.find(href=re.compile(definite))
×
249

UNCOV
250
            if link:
×
UNCOV
251
                return cls.expand_link(url, link.get('href'))
×
252

253
            return link
×
254

255
        else:
256
            links = []
×
257

258
            for link in page_soup.find_all('a'):
×
259
                href = link.get('href')
×
260

261
                if href:
×
262
                    links.append(cls.expand_link(url, href))
×
263

264
            return links
×
265

266
    @classmethod
3✔
267
    def expand_link(cls, base_url: str, link: str) -> str:
3✔
268
        """Expands a given relative link using base URL if required.
269

270
        :param base_url:
271
        :param link: absolute or relative link
272

273
        """
UNCOV
274
        if not link.startswith('http'):
×
UNCOV
275
            link = urljoin(base_url, link)
×
276

UNCOV
277
        return link
×
278

279
    def test_configuration(self) -> bool:
3✔
280
        """This should implement a configuration test, e.g. make test login and report success."""
281
        return True
3✔
282

283
    def get_torrent(self, url: str, *, last_updated: datetime | None = None) -> TorrentData | None:
3✔
284
        """This method should be implemented in torrent tracker handler class
285
        and must return .torrent file contents.
286

287
        :param url: URL to download torrent file from
288
        :param last_updated: torrent last updated datetime
289

290
        """
291
        raise NotImplementedError  # pragma: nocover
292

293
    def extract_page_data(self) -> PageData:
3✔
294
        data = PageData(
3✔
295
            title=self.extract_page_title(),
296
            cover=self.extract_page_cover(),
297
            date_updated=self.extract_page_date_updated()
298
        )
299
        return data
3✔
300

301
    def extract_page_title(self) -> str:
3✔
302
        page = self._torrent_page
3✔
303

304
        if not page:
3✔
305
            return ''
3✔
306

UNCOV
307
        return getattr(page.select_one('title'), 'text', '')
×
308

309
    def extract_page_cover(self) -> str:
3✔
310
        return ''
3✔
311

312
    def extract_page_date_updated(self) -> datetime | None:
3✔
313
        return None
3✔
314

315
    def parse_datetime(self, dt_str: str, fmt: str, *, locale: str = ''):
3✔
UNCOV
316
        old_locale = getlocale()
×
317

UNCOV
318
        if locale:
×
UNCOV
319
            setlocale(LC_ALL, (locale, 'UTF-8'))
×
320

UNCOV
321
        try:
×
UNCOV
322
            try:
×
UNCOV
323
                return datetime.strptime(dt_str, fmt)  # noqa: DTZ007
×
324

325
            except ValueError:
×
326
                return None
×
327
        finally:
UNCOV
328
            setlocale(LC_ALL, old_locale)
×
329

330
    def get_torrent_page(self, url: str, *, drop_cache: bool = False) -> BeautifulSoup:
3✔
331
        """Get torrent page as soup for further data extraction.
332

333
        :param url:
334
        :param drop_cache: Do not use cached version if any.
335

336
        """
337
        torrent_page = self._torrent_page
3✔
338

339
        if url != self._torrent_page_url:
3✔
340
            drop_cache = True
3✔
341

342
        if drop_cache or not torrent_page:
3✔
343
            torrent_page = self.get_response(
3✔
344
                url,
345
                referer=url,
346
                cookies=self.cookies,
347
                query_string=self.get_query_string(),
348
                as_soup=True
349
            )
UNCOV
350
            self._torrent_page = torrent_page
×
UNCOV
351
            self._torrent_page_url = url
×
352

UNCOV
353
        return torrent_page
×
354

355

356
class GenericTracker(BaseTracker):
3✔
357
    """Generic torrent tracker handler class implementing most common tracker handling methods."""
358

359
    def get_id_from_link(self, url: str) -> str:
3✔
360
        """Returns forum thread identifier from full thread URL.
361

362
        :param url:
363

364
        """
365
        return url.split('=')[1]
×
366

367
    def get_torrent(self, url: str, *, last_updated: datetime | None = None) -> TorrentData | None:
3✔
368
        """This is the main method which returns torrent file contents
369
        of file located at URL.
370

371
        :param url: URL to find and get torrent from
372
        :param last_updated: torrent last updated datetime
373

374
        """
375
        download_link = self.get_download_link(url)
3✔
376

377
        if not download_link:
3✔
378
            self.log_error(f'Cannot find torrent file download link at {url}')
×
379
            return None
×
380

381
        page_data = self.extract_page_data()
3✔
382

383
        self.log_debug(f'Torrent download link found: {download_link}')
3✔
384

385
        if last_updated and last_updated >= page_data.date_updated:
3✔
386
            self.log_debug('Skipped as up to date')
×
387
            return None
×
388
        else:
389
            torrent_contents = self.download_torrent(download_link, referer=url)
3✔
390

UNCOV
391
        if torrent_contents is None:
×
392
            self.log_debug(f'Torrent download from `{download_link}` has failed')
×
393
            return None
×
394

UNCOV
395
        parsed = parse_torrent(torrent_contents)
×
396

UNCOV
397
        if not parsed:
×
398
            return None
×
399

UNCOV
400
        return TorrentData(
×
401
            url=url,
402
            url_file=download_link,
403
            parsed=parsed,
404
            raw=torrent_contents,
405
            page=page_data,
406
        )
407

408
    def get_download_link(self, url: str) -> str:
3✔
409
        """Tries to find .torrent file download link on page and return it.
410

411
        :param url: URL to find a download link at.
412

413
        """
414
        raise NotImplementedError  # pragma: nocover
415

416
    def download_torrent(self, url: str, *, referer: str = '') -> bytes:
3✔
417
        """Returns .torrent file contents from the given URL.
418

419
        :param url: torrent file URL
420
        :param referer: Referer header value
421

422
        """
423
        raise NotImplementedError  # pragma: nocover
424

425

426
class GenericPublicTracker(GenericTracker):
3✔
427
    """Generic torrent tracker handler class implementing most common handling methods for public trackers."""
428

429
    login_required: bool = False
3✔
430

431
    def get_id_from_link(self, url: str) -> str:
3✔
432
        return url.split('/')[-1]
×
433

434
    def download_torrent(self, url: str, *, referer: str = '') -> bytes | None:
3✔
435
        self.log_debug(f'Downloading torrent file from {url} ...')
3✔
436
        # That was a check that user himself visited torrent's page ;)
437
        response = self.get_response(url, referer=referer)
3✔
UNCOV
438
        return getattr(response, 'content', None)
×
439

440

441
class GenericPrivateTracker(GenericPublicTracker):
3✔
442
    """Generic torrent tracker handler class implementing most common handling methods
443
    for private trackers (that require user registration).
444

445
    """
446

447
    login_required: bool = True
3✔
448

449
    login_url: str = None
3✔
450
    """URL where with login form.
3✔
451
    This can include `%(domain)s` marker in place of a domain name when domain mirrors are used
452
    (see `mirrors` attribute of BaseTracker).
453

454
    """
455

456
    auth_cookie_name: str = None
3✔
457
    """Cookie name to verify that a log in was successful."""
3✔
458

459
    auth_qs_param_name: str = None
3✔
460
    """HTTP GET (query string) parameter name to verify that a log in was successful. Probably session ID."""
3✔
461

462
    def __init__(
3✔
463
            self,
464
            *,
465
            username: str = '',
466
            password: str = '',
467
            cookies: dict[str, str] | None = None,
468
            query_string: str = '',
469
            **kwargs
470
    ):
471

472
        super().__init__(
3✔
473
            cookies=cookies,
474
            query_string=query_string,
475
        )
476

477
        self.logged_in = False
3✔
478
        # Stores a number of login attempts to prevent recursion.
479
        self.login_counter = 0
3✔
480

481
        self.username = username
3✔
482
        self.password = password
3✔
483

484
    def get_encode_form_data(self, data: dict) -> dict:
3✔
485
        """Encode dictionary from get_login_form_data using Tracker page encoding.
486

487
        :param dict data:
488

489
        """
490
        return {key: self.encode_value(value) for key, value in data.items()}
×
491

492
    def get_login_form_data(self, login: str, password: str) -> dict:
3✔
493
        """Should return a dictionary with data to be pushed to authorization form.
494

495
        :param login:
496
        :param password:
497

498
        """
499
        return {'username': login, 'password': password}
×
500

501
    def test_configuration(self) -> bool:
3✔
502
        return self.login(self.alias)
×
503

504
    def login(self, domain: str) -> bool:
3✔
505
        """Implements tracker login procedure. Returns success bool."""
506

507
        login_url = self.login_url % {'domain': domain}
×
508

509
        self.log_debug(f'Trying to login at {login_url} ...')
×
510

511
        if self.logged_in:
×
512
            raise TorrtTrackerException(f'Consecutive login attempt detected at `{self.__class__.__name__}`')
×
513

514
        if not self.username or not self.password:
×
515
            return False
×
516

517
        self.login_counter += 1
×
518

519
        # No recursion wanted.
520
        if self.login_counter > 1:
×
521
            return False
×
522

523
        allow_redirects = False  # Not to lose cookies on the redirect.
×
524

525
        if self.auth_qs_param_name:
×
526
            allow_redirects = True  # To be able to get Session ID from query string.
×
527

528
        form_data = self.get_login_form_data(self.username, self.password)
×
529
        form_data = self.get_encode_form_data(form_data)
×
530

531
        response = self.get_response(
×
532
            login_url,
533
            form_data=form_data,
534
            allow_redirects=allow_redirects,
535
            cookies=self.cookies
536
        )
537

538
        if not response:  # e.g. Connection aborted.
×
539
            return False
×
540

541
        # Login success checks.
542
        parsed_qs = parse_qs(urlparse(response.url).query)
×
543

544
        if self.auth_cookie_name in response.cookies or self.auth_qs_param_name in parsed_qs:
×
545

546
            self.logged_in = True
×
547

548
            if parsed_qs:
×
549
                self.query_string = parsed_qs[self.auth_qs_param_name][0]
×
550

551
            self.cookies = response.cookies
×
552

553
            # Save auth info to config.
554
            self.save_settings()
×
555
            self.log_debug('Login is successful')
×
556

557
        else:
558
            self.log_warning('Login with given credentials failed')
×
559

560
        return self.logged_in
×
561

562
    def before_download(self, url: str):
3✔
563
        """Used to perform some required actions right before .torrent download.
564
        E.g.: to set a sentinel cookie that allows the download.
565

566
        :param url: torrent file URL
567

568
        """
569

570
    def get_query_string(self) -> str:
3✔
571
        """Returns an auth query string to be passed to get_response()
572
        for auth purposes.
573

574
        :return: auth string, e.g. sid=1234567890
575

576
        """
577
        query_string = super().get_query_string()
3✔
578

579
        if self.auth_qs_param_name:
3✔
580
            query_string = f'{self.auth_qs_param_name}={self.query_string}'
3✔
581

582
        return query_string
3✔
583

584
    def download_torrent(self, url: str, *, referer: str = '') -> bytes | None:
3✔
UNCOV
585
        self.log_debug(f'Downloading torrent file from {url} ...')
×
586

UNCOV
587
        self.before_download(url)
×
588

UNCOV
589
        response = self.get_response(
×
590
            url,
591
            cookies=self.cookies,
592
            query_string=self.get_query_string(),
593
            referer=referer
594
        )
595

UNCOV
596
        return getattr(response, 'content', None)
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc