• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

idlesign / torrt / 25483587268

07 May 2026 08:00AM UTC coverage: 67.598% (+0.005%) from 67.593%
25483587268

push

github

idlesign
fix lint

0 of 1 new or added line in 1 file covered. (0.0%)

1 existing line in 1 file now uncovered.

1137 of 1682 relevant lines covered (67.6%)

3.84 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

78.63
/src/torrt/base_tracker.py
1
import re
6✔
2
from datetime import datetime
6✔
3
from http.cookiejar import CookieJar
6✔
4
from itertools import chain
6✔
5
from locale import LC_ALL, getlocale, setlocale
6✔
6
from typing import ClassVar
6✔
7
from urllib.parse import parse_qs, urljoin, urlparse
6✔
8

9
from .exceptions import TorrtTrackerException
6✔
10
from .utils import (
6✔
11
    BeautifulSoup,
12
    HttpClient,
13
    PageData,
14
    Response,
15
    TorrentData,
16
    TrackerClassesRegistry,
17
    TrackerObjectsRegistry,
18
    WithSettings,
19
    encode_value,
20
    make_soup,
21
    parse_torrent,
22
)
23

24

25
class BaseTracker(WithSettings):
6✔
26
    """Base torrent tracker handler class offering helper methods for its ancestors."""
27

28
    config_entry_name: str = 'trackers'
6✔
29

30
    active: bool = True
6✔
31
    """Tracker support flag. Can be used to skip initialization for currently unavailable trackers."""
6✔
32

33
    alias: str = None
6✔
34
    """Tracker alias. Usually main tracker domain. See also `mirrors` attribute."""
6✔
35

36
    mirrors: ClassVar[list[str]] = []
6✔
37
    """List of mirror domain names."""
6✔
38

39
    encoding: str | None = None
6✔
40
    """Tracker html page encoding (cp1251 or other)."""
6✔
41

42
    test_urls: ClassVar[list[str]] = []
6✔
43
    """Page URLs for automatic tests of torrent extraction."""
6✔
44

45
    raise_on_error_response: bool = False
6✔
46
    """Whether to raise an exception on request errors.
6✔
47
    Primary use is debug and testsuite.
48
    
49
    """
50

51
    request_timeout: float | int = 4
6✔
52

53
    def __init__(self, *, cookies: dict[str, str] | None = None, query_string: str = '', **kwargs):
6✔
54
        self.mirror_picked: str | None = None
6✔
55

56
        if cookies is None:
6✔
57
            cookies = {}
6✔
58

59
        self.cookies = cookies
6✔
60
        self.query_string = query_string
6✔
61

62
        # Cached data for currently processed torrent.
63
        self._torrent_page_url: str = ''
6✔
64
        self._torrent_page: BeautifulSoup | None = None
6✔
65

66
        self.client = HttpClient(
6✔
67
            silence_exceptions=not self.raise_on_error_response,
68
            dump_fname_tpl=f'%(ts)s_{self.__class__.__name__}.html'
69
        )
70

71
        super().__init__()
6✔
72

73
    def __init_subclass__(cls, **kwargs):
6✔
74
        if cls.alias and cls.active:
6✔
75
            TrackerClassesRegistry.add(cls)
6✔
76

77
    def get_query_string(self) -> str:
6✔
78
        return self.query_string
6✔
79

80
    def encode_value(self, value: str) -> bytes | str:
6✔
81
        """Encodes a value.
82

83
        :param value:
84

85
        """
86
        return encode_value(value, encoding=self.encoding)
×
87

88
    def pick_mirror(self, url: str) -> str:
6✔
89
        """Probes mirrors (domains) one by one and chooses one whick is available to use.
90

91
        :param url:
92

93
        """
94
        mirror_picked = self.mirror_picked
6✔
95

96
        if mirror_picked is None:
6✔
97
            self.log_debug('Picking a mirror ...')
6✔
98

99
            original_domain = self.extract_domain(url)
6✔
100
            mirror_picked = original_domain
6✔
101

102
            for mirror_domain in self.mirrors:
6✔
103
                mirror_url = f'{self.extract_scheme(url)}://{mirror_domain}'
6✔
104

105
                self.log_debug(f'Probing mirror: `{mirror_url}` ...')
6✔
106

107
                response = self.client.request(
6✔
108
                    mirror_url,
109
                    timeout=self.request_timeout,
110
                    silence_exceptions=True,
111
                )
112

113
                if response and response.url.startswith(mirror_url):
6✔
114
                    mirror_picked = mirror_domain
3✔
115
                    break
3✔
116

117
            self.mirror_picked = mirror_picked
6✔
118

119
        return mirror_picked
6✔
120

121
    def get_mirrored_url(self, url: str) -> str:
6✔
122
        """Returns a mirrored URL for a given one.
123

124
        :param url:
125

126
        """
127
        mirror_picked = self.mirror_picked
6✔
128
        original_domain = self.extract_domain(url)
6✔
129
        url_mirror = url.replace(original_domain, mirror_picked)
6✔
130
        return url_mirror
6✔
131

132
    def register(self):
6✔
133
        """Adds this object into TrackerObjectsRegistry."""
134

135
        TrackerObjectsRegistry.add(self)
6✔
136

137
    @classmethod
6✔
138
    def can_handle(cls, string: str) -> bool:
6✔
139
        """Returns boolean whether this tracker can handle torrent from string.
140

141
        :param string: String, describing torrent. E.g. URL from torrent comment.
142

143
        """
144
        for domain in chain([cls.alias], cls.mirrors):
6✔
145
            if domain in string:
6✔
146
                return True
6✔
147
        return False
6✔
148

149
    @classmethod
6✔
150
    def extract_scheme(cls, url: str) -> str:
6✔
151
        """Extracts scheme from a given URL.
152

153
        :param url:
154

155
        """
156
        return urlparse(url).scheme
6✔
157

158
    @classmethod
6✔
159
    def extract_domain(cls, url: str) -> str:
6✔
160
        """Extracts domain from a given URL.
161

162
        :param url:
163

164
        """
165
        return urlparse(url).netloc
6✔
166

167
    def get_response(
6✔
168
            self,
169
            url: str,
170
            *,
171
            form_data: dict | None = None,
172
            allow_redirects: bool = True,
173
            referer: str = '',
174
            cookies: dict | CookieJar | None = None,
175
            query_string: str = '',
176
            as_soup: bool = False
177

178
    ) -> Response | BeautifulSoup | None:
179
        """Returns an HTTP resource object from given URL.
180

181
        If a dictionary is passed in `form_data` POST HTTP method
182
        would be used to pass data to resource (even if that dictionary is empty).
183

184
        :param url: URL to get data from
185

186
        :param form_data: data for POST
187

188
        :param allow_redirects: whether to follow server redirects
189

190
        :param referer: data to put into Referer header
191

192
        :param cookies: cookies to use
193

194
        :param query_string:  query string (GET parameters) to add to URL
195

196
        :param as_soup: whether to return BeautifulSoup object instead of Requests response
197

198
        """
199
        if query_string:
6✔
200

201
            delim = '?'
6✔
202

203
            if '?' in url:
6✔
204
                delim = '&'
6✔
205

206
            url = f'{url}{delim}{query_string}'
6✔
207

208
        self.pick_mirror(url)
6✔
209

210
        url = self.get_mirrored_url(url)
6✔
211

212
        result = self.client.request(
6✔
213
            url=url,
214
            data=form_data,
215
            referer=referer,
216
            allow_redirects=allow_redirects,
217
            cookies=cookies,
218
        )
219

220
        if result is not None and as_soup:
3✔
221
            result = self.make_page_soup(result.text)
3✔
222

223
        return result
3✔
224

225
    @classmethod
6✔
226
    def make_page_soup(cls, html: str) -> BeautifulSoup:
6✔
227
        """Returns BeautifulSoup object from a html.
228

229
        :param html:
230

231
        """
232
        return make_soup(html)
3✔
233

234
    @classmethod
6✔
235
    def find_links(cls, url: str, page_soup: BeautifulSoup, *, definite: str = '') -> str | None | list[str]:
6✔
236
        """Returns a list with hyperlinks found in supplied page_soup
237
        or a definite link.
238

239
        :param url: page URL
240
        :param page_soup: page soup
241
        :param definite: regular expression to match link
242

243
        """
244
        if not page_soup:
3✔
245
            return None if definite else []
×
246

247
        if definite:
3✔
248
            link = page_soup.find(href=re.compile(definite))
3✔
249

250
            if link:
3✔
251
                return cls.expand_link(url, link.get('href'))
3✔
252

253
            return link
×
254

255
        else:
256
            links = []
×
257

258
            for link in page_soup.find_all('a'):
×
259
                href = link.get('href')
×
260

261
                if href:
×
262
                    links.append(cls.expand_link(url, href))
×
263

264
            return links
×
265

266
    @classmethod
6✔
267
    def expand_link(cls, base_url: str, link: str) -> str:
6✔
268
        """Expands a given relative link using base URL if required.
269

270
        :param base_url:
271
        :param link: absolute or relative link
272

273
        """
274
        if not link.startswith('http'):
3✔
275
            link = urljoin(base_url, link)
3✔
276

277
        return link
3✔
278

279
    def test_configuration(self) -> bool:
6✔
280
        """This should implement a configuration test, e.g. make test login and report success."""
281
        return True
6✔
282

283
    def get_torrent(self, url: str, *, last_updated: datetime | None = None) -> TorrentData | None:
6✔
284
        """This method should be implemented in torrent tracker handler class
285
        and must return .torrent file contents.
286

287
        :param url: URL to download torrent file from
288
        :param last_updated: torrent last updated datetime
289

290
        """
291
        raise NotImplementedError  # pragma: nocover
292

293
    def extract_page_data(self) -> PageData:
6✔
294
        data = PageData(
6✔
295
            title=self.extract_page_title(),
296
            cover=self.extract_page_cover(),
297
            date_updated=self.extract_page_date_updated()
298
        )
299
        return data
6✔
300

301
    def extract_page_title(self) -> str:
6✔
302
        page = self._torrent_page
6✔
303

304
        if not page:
6✔
305
            return ''
6✔
306

307
        return getattr(page.select_one('title'), 'text', '')
3✔
308

309
    def extract_page_cover(self) -> str:
6✔
310
        return ''
6✔
311

312
    def extract_page_date_updated(self) -> datetime | None:
6✔
313
        return None
6✔
314

315
    def parse_datetime(self, dt_str: str, fmt: str, *, locale: str = ''):
6✔
316
        old_locale = getlocale()
3✔
317

318
        if locale:
3✔
319
            setlocale(LC_ALL, (locale, 'UTF-8'))
3✔
320

321
        try:
3✔
322
            try:
3✔
323
                return datetime.strptime(dt_str, fmt)  # noqa: DTZ007
3✔
324

325
            except ValueError:
×
326
                return None
×
327
        finally:
328
            setlocale(LC_ALL, old_locale)
3✔
329

330
    def get_torrent_page(self, url: str, *, drop_cache: bool = False) -> BeautifulSoup:
6✔
331
        """Get torrent page as soup for further data extraction.
332

333
        :param url:
334
        :param drop_cache: Do not use cached version if any.
335

336
        """
337
        torrent_page = self._torrent_page
6✔
338

339
        if url != self._torrent_page_url:
6✔
340
            drop_cache = True
6✔
341

342
        if drop_cache or not torrent_page:
6✔
343
            torrent_page = self.get_response(
6✔
344
                url,
345
                referer=url,
346
                cookies=self.cookies,
347
                query_string=self.get_query_string(),
348
                as_soup=True
349
            )
350
            self._torrent_page = torrent_page
3✔
351
            self._torrent_page_url = url
3✔
352

353
        return torrent_page
3✔
354

355

356
class GenericTracker(BaseTracker):
6✔
357
    """Generic torrent tracker handler class implementing most common tracker handling methods."""
358

359
    def get_id_from_link(self, url: str) -> str:
6✔
360
        """Returns forum thread identifier from full thread URL.
361

362
        :param url:
363

364
        """
365
        return url.split('=')[1]
×
366

367
    def get_torrent(self, url: str, *, last_updated: datetime | None = None) -> TorrentData | None:
6✔
368
        """This is the main method which returns torrent file contents
369
        of file located at URL.
370

371
        :param url: URL to find and get torrent from
372
        :param last_updated: torrent last updated datetime
373

374
        """
375
        download_link = self.get_download_link(url)
6✔
376

377
        if not download_link:
6✔
378
            self.log_error(f'Cannot find torrent file download link at {url}')
×
379
            return None
×
380

381
        page_data = self.extract_page_data()
6✔
382

383
        self.log_debug(f'Torrent download link found: {download_link}')
6✔
384

385
        if last_updated and last_updated >= page_data.date_updated:
6✔
386
            self.log_debug('Skipped as up to date')
×
387
            return None
×
388
        else:
389
            torrent_contents = self.download_torrent(download_link, referer=url)
6✔
390

391
        if torrent_contents is None:
3✔
392
            self.log_debug(f'Torrent download from `{download_link}` has failed')
×
393
            return None
×
394

395
        parsed = parse_torrent(torrent_contents)
3✔
396

397
        if not parsed:
3✔
398
            return None
×
399

400
        return TorrentData(
3✔
401
            url=url,
402
            url_file=download_link,
403
            parsed=parsed,
404
            raw=torrent_contents,
405
            page=page_data,
406
        )
407

408
    def get_download_link(self, url: str) -> str:
6✔
409
        """Tries to find .torrent file download link on page and return it.
410

411
        :param url: URL to find a download link at.
412

413
        """
414
        raise NotImplementedError  # pragma: nocover
415

416
    def download_torrent(self, url: str, *, referer: str = '') -> bytes:
6✔
417
        """Returns .torrent file contents from the given URL.
418

419
        :param url: torrent file URL
420
        :param referer: Referer header value
421

422
        """
423
        raise NotImplementedError  # pragma: nocover
424

425

426
class GenericPublicTracker(GenericTracker):
6✔
427
    """Generic torrent tracker handler class implementing most common handling methods for public trackers."""
428

429
    login_required: bool = False
6✔
430

431
    def get_id_from_link(self, url: str) -> str:
6✔
NEW
432
        return url.rsplit('/', maxsplit=1)[-1]
×
433

434
    def download_torrent(self, url: str, *, referer: str = '') -> bytes | None:
6✔
435
        self.log_debug(f'Downloading torrent file from {url} ...')
6✔
436
        # That was a check that user himself visited torrent's page ;)
437
        response = self.get_response(url, referer=referer)
6✔
438
        return getattr(response, 'content', None)
3✔
439

440

441
class GenericPrivateTracker(GenericPublicTracker):
6✔
442
    """Generic torrent tracker handler class implementing most common handling methods
443
    for private trackers (that require user registration).
444

445
    """
446

447
    login_required: bool = True
6✔
448

449
    login_url: str = None
6✔
450
    """URL where with login form.
6✔
451
    This can include `%(domain)s` marker in place of a domain name when domain mirrors are used
452
    (see `mirrors` attribute of BaseTracker).
453

454
    """
455

456
    auth_cookie_name: str = None
6✔
457
    """Cookie name to verify that a log in was successful."""
6✔
458

459
    auth_qs_param_name: str = None
6✔
460
    """HTTP GET (query string) parameter name to verify that a log in was successful. Probably session ID."""
6✔
461

462
    def __init__(
6✔
463
            self,
464
            *,
465
            username: str = '',
466
            password: str = '',
467
            cookies: dict[str, str] | None = None,
468
            query_string: str = '',
469
            **kwargs
470
    ):
471

472
        super().__init__(
6✔
473
            cookies=cookies,
474
            query_string=query_string,
475
        )
476

477
        self.logged_in = False
6✔
478
        # Stores a number of login attempts to prevent recursion.
479
        self.login_counter = 0
6✔
480

481
        self.username = username
6✔
482
        self.password = password
6✔
483

484
    def get_encode_form_data(self, data: dict) -> dict:
6✔
485
        """Encode dictionary from get_login_form_data using Tracker page encoding.
486

487
        :param dict data:
488

489
        """
490
        return {key: self.encode_value(value) for key, value in data.items()}
×
491

492
    def get_login_form_data(self, login: str, password: str) -> dict:
6✔
493
        """Should return a dictionary with data to be pushed to authorization form.
494

495
        :param login:
496
        :param password:
497

498
        """
499
        return {'username': login, 'password': password}
×
500

501
    def test_configuration(self) -> bool:
6✔
502
        return self.login(self.alias)
×
503

504
    def login(self, domain: str) -> bool:
6✔
505
        """Implements tracker login procedure. Returns success bool."""
506

507
        login_url = self.login_url % {'domain': domain}
×
508

509
        self.log_debug(f'Trying to login at {login_url} ...')
×
510

511
        if self.logged_in:
×
512
            raise TorrtTrackerException(f'Consecutive login attempt detected at `{self.__class__.__name__}`')
×
513

514
        if not self.username or not self.password:
×
515
            return False
×
516

517
        self.login_counter += 1
×
518

519
        # No recursion wanted.
520
        if self.login_counter > 1:
×
521
            return False
×
522

523
        allow_redirects = False  # Not to lose cookies on the redirect.
×
524

525
        if self.auth_qs_param_name:
×
526
            allow_redirects = True  # To be able to get Session ID from query string.
×
527

528
        form_data = self.get_login_form_data(self.username, self.password)
×
529
        form_data = self.get_encode_form_data(form_data)
×
530

531
        response = self.get_response(
×
532
            login_url,
533
            form_data=form_data,
534
            allow_redirects=allow_redirects,
535
            cookies=self.cookies
536
        )
537

538
        if not response:  # e.g. Connection aborted.
×
539
            return False
×
540

541
        # Login success checks.
542
        parsed_qs = parse_qs(urlparse(response.url).query)
×
543

544
        if self.auth_cookie_name in response.cookies or self.auth_qs_param_name in parsed_qs:
×
545

546
            self.logged_in = True
×
547

548
            if parsed_qs:
×
549
                self.query_string = parsed_qs[self.auth_qs_param_name][0]
×
550

551
            self.cookies = response.cookies
×
552

553
            # Save auth info to config.
554
            self.save_settings()
×
555
            self.log_debug('Login is successful')
×
556

557
        else:
558
            self.log_warning('Login with given credentials failed')
×
559

560
        return self.logged_in
×
561

562
    def before_download(self, url: str):
6✔
563
        """Used to perform some required actions right before .torrent download.
564
        E.g.: to set a sentinel cookie that allows the download.
565

566
        :param url: torrent file URL
567

568
        """
569

570
    def get_query_string(self) -> str:
6✔
571
        """Returns an auth query string to be passed to get_response()
572
        for auth purposes.
573

574
        :return: auth string, e.g. sid=1234567890
575

576
        """
577
        query_string = super().get_query_string()
6✔
578

579
        if self.auth_qs_param_name:
6✔
580
            query_string = f'{self.auth_qs_param_name}={self.query_string}'
6✔
581

582
        return query_string
6✔
583

584
    def download_torrent(self, url: str, *, referer: str = '') -> bytes | None:
6✔
585
        self.log_debug(f'Downloading torrent file from {url} ...')
3✔
586

587
        self.before_download(url)
3✔
588

589
        response = self.get_response(
3✔
590
            url,
591
            cookies=self.cookies,
592
            query_string=self.get_query_string(),
593
            referer=referer
594
        )
595

596
        return getattr(response, 'content', None)
3✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc