• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

snarfed / bridgy-fed / 6bb03b67-8218-41ab-96f6-f6409d110030

29 Nov 2025 06:35PM UTC coverage: 93.012% (+0.04%) from 92.969%
6bb03b67-8218-41ab-96f6-f6409d110030

push

circleci

snarfed
tweaks to id.normalize/translate_user_id, narrow somewhat to just user ids

TODO: what should they return if id is not a valid user id? add and use new is_user_id function?

7 of 7 new or added lines in 2 files covered. (100.0%)

40 existing lines in 4 files now uncovered.

6256 of 6726 relevant lines covered (93.01%)

0.93 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

93.33
/common.py
1
"""Misc common utilities."""
2
import base64
1✔
3
from datetime import timedelta
1✔
4
import functools
1✔
5
import logging
1✔
6
import os
1✔
7
from pathlib import Path
1✔
8
import re
1✔
9
import threading
1✔
10
import urllib.parse
1✔
11
from urllib.parse import urljoin, urlparse
1✔
12

13
from Crypto.Util import number
1✔
14
import flask
1✔
15
from flask import abort, g, has_request_context, make_response, redirect, request
1✔
16
from flask.views import View
1✔
17
from google.cloud.error_reporting.util import build_flask_context
1✔
18
from google.cloud import ndb
1✔
19
from google.cloud.ndb.key import Key
1✔
20
from google.protobuf.timestamp_pb2 import Timestamp
1✔
21
from granary import as2
1✔
22
from oauth_dropins.webutil import util, webmention
1✔
23
from oauth_dropins.webutil.appengine_config import error_reporting_client, tasks_client
1✔
24
from oauth_dropins.webutil.appengine_info import DEBUG, LOCAL_SERVER
1✔
25
from oauth_dropins.webutil import flask_util
1✔
26
from oauth_dropins.webutil.util import interpret_http_exception, json_dumps
1✔
27
from negotiator import ContentNegotiator, AcceptParameters, ContentType
1✔
28
import requests
1✔
29
import werkzeug.exceptions
1✔
30
from werkzeug.exceptions import HTTPException
1✔
31

32
import memcache
1✔
33

34
logger = logging.getLogger(__name__)
1✔
35

36
# allow hostname chars (a-z, 0-9, -), allow arbitrary unicode (eg ☃.net), don't
37
# allow specific chars that we'll often see in webfinger, AP handles, etc. (@, :)
38
# https://stackoverflow.com/questions/10306690/what-is-a-regular-expression-which-will-match-a-valid-domain-name-without-a-subd
39
#
40
# TODO: preprocess with domain2idna, then narrow this to just [a-z0-9-]
41
# TODO: unify with oauth_dropins.webutil.util.DOMAIN_RE?
42
DOMAIN_RE = r'^([^/:;@?!\'.]+\.)+[^/:@_?!\'.]+$'
1✔
43

44
CONTENT_TYPE_HTML = 'text/html; charset=utf-8'
1✔
45

46
GCP_PROJECT_ID = 'bridgy-federated'  # used in create_task
1✔
47

48
PRIMARY_DOMAIN = 'fed.brid.gy'
1✔
49
# protocol-specific subdomains are under this "super"domain
50
SUPERDOMAIN = '.brid.gy'
1✔
51
# TODO: add a Flask route decorator version of util.canonicalize_domain, then
52
# use it to canonicalize most UI routes from these to fed.brid.gy.
53
# TODO: unify with models.PROTOCOLS
54
PROTOCOL_DOMAINS = (
1✔
55
    'ap.brid.gy',
56
    'atproto.brid.gy',
57
    'bsky.brid.gy',
58
    'nostr.brid.gy',
59
    'web.brid.gy',
60
)
61
if DEBUG:
1✔
62
    PROTOCOL_DOMAINS += (
1✔
63
        'efake.brid.gy',
64
        'fa.brid.gy',
65
        'other.brid.gy',
66
    )
67
OTHER_DOMAINS = (
1✔
68
    'bridgy-federated.appspot.com',
69
    'bridgy-federated.uc.r.appspot.com',
70
)
71
LOCAL_DOMAINS = (
1✔
72
  'localhost',
73
  'localhost:8080',
74
  'my.dev.com:8080',
75
)
76
DOMAINS = (PRIMARY_DOMAIN,) + PROTOCOL_DOMAINS + OTHER_DOMAINS + LOCAL_DOMAINS
1✔
77
# TODO: unify with manual_opt_out
78
# TODO: unify with Bridgy's
79
DOMAIN_BLOCKLIST = (
1✔
80
    'bsky.social',
81
    'facebook.com',
82
    'fb.com',
83
    'instagram.com',
84
    'reddit.com',
85
    'rumble.com',  # serves infinite HTTP 307 redirects to GCP
86
    't.co',
87
    'tiktok.com',
88
    'twitter.com',
89
    'x.com',
90
    'youtu.be',
91
    'youtube.com',
92
)
93

94
# canaries that Seirdy inserts into their blocklists
95
# https://seirdy.one/posts/2023/05/02/fediverse-blocklists/#important-modifications-before-importing
96
DOMAIN_BLOCKLIST_CANARIES = (
1✔
97
    '000delete.this.line.if.you.have.read.the.documentation.on.seirdy.one',
98
    'canary.tier1.example.com',
99
    'canary.tier0.example.com',
100
    'canary.fedinuke.example.com',
101
)
102

103
SMTP_HOST = 'smtp.gmail.com'
1✔
104
SMTP_PORT = 587
1✔
105

106
# populated in models.reset_protocol_properties
107
SUBDOMAIN_BASE_URL_RE = None
1✔
108
ID_FIELDS = ('id', 'object', 'actor', 'author', 'inReplyTo', 'url')
1✔
109

110
CACHE_CONTROL = {'Cache-Control': 'public, max-age=3600'}  # 1 hour
1✔
111
CACHE_CONTROL_VARY_ACCEPT = {**CACHE_CONTROL, 'Vary': 'Accept'}
1✔
112

113
NDB_MEMCACHE_TIMEOUT = timedelta(hours=2)
1✔
114

115
USER_AGENT = 'Bridgy Fed (https://fed.brid.gy/)'
1✔
116
util.set_user_agent(USER_AGENT)
1✔
117

118
# https://cloud.google.com/appengine/docs/locations
119
TASKS_LOCATION = 'us-central1'
1✔
120
RUN_TASKS_INLINE = False  # overridden by unit tests
1✔
121

122
# for Protocol.REQUIRES_OLD_ACCOUNT, how old is old enough
123
OLD_ACCOUNT_AGE = timedelta(days=7)
1✔
124

125
# populated later in this file
126
NDB_CONTEXT_KWARGS = None
1✔
127

128
_negotiator = ContentNegotiator(acceptable=[
1✔
129
    AcceptParameters(ContentType(CONTENT_TYPE_HTML)),
130
    AcceptParameters(ContentType(as2.CONTENT_TYPE)),
131
    AcceptParameters(ContentType(as2.CONTENT_TYPE_LD)),
132
])
133

134
# User ids who opt into testing new "beta" features and changes before we roll them
135
# out to everyone.
136
with open(Path(os.path.dirname(__file__)) / 'beta_users.txt') as f:
1✔
137
  BETA_USER_IDS = util.load_file_lines(f)
1✔
138

139
class ErrorButDoNotRetryTask(HTTPException):
1✔
140
    code = 299
1✔
141
    description = 'ErrorButDoNotRetryTask'
1✔
142

143
# https://github.com/pallets/flask/issues/1837#issuecomment-304996942
144
werkzeug.exceptions.default_exceptions.setdefault(299, ErrorButDoNotRetryTask)
1✔
145
werkzeug.exceptions._aborter.mapping.setdefault(299, ErrorButDoNotRetryTask)
1✔
146

147

148
@functools.cache
1✔
149
def bot_user_ids():
1✔
150
    """Returns all copy ids for protocol bot users."""
151
    from models import PROTOCOLS
1✔
152
    from web import Web
1✔
153

154
    bot_ids = set(PROTOCOL_DOMAINS)
1✔
155
    protocols = set(p for p in PROTOCOLS.values() if p and p.LABEL != 'ui')
1✔
156

157
    for bot_proto in protocols:
1✔
158
        subdomain = f'{bot_proto.ABBREV}{SUPERDOMAIN}'
1✔
159
        if not (bot := Web.get_by_id(subdomain)):
1✔
160
            continue
1✔
161

162
        bot_ids.update(copy.uri for copy in bot.copies)
1✔
163

164
        for other_proto in protocols:
1✔
165
            if (bot_proto != other_proto and not other_proto.HAS_COPIES
1✔
166
                    and other_proto.LABEL not in bot_proto.DEFAULT_ENABLED_PROTOCOLS):
167
                bot_ids.add(bot.id_as(other_proto))
1✔
168

169
    return bot_ids
1✔
170

171

172
def base64_to_long(x):
1✔
173
    """Converts from URL safe base64 encoding to long integer.
174

175
    Originally from ``django_salmon.magicsigs``. Used in :meth:`User.public_pem`
176
    and :meth:`User.private_pem`.
177
    """
178
    return number.bytes_to_long(base64.urlsafe_b64decode(x))
1✔
179

180

181
def long_to_base64(x):
1✔
182
    """Converts from long integer to base64 URL safe encoding.
183

184
    Originally from ``django_salmon.magicsigs``. Used in :meth:`User.get_or_create`.
185
    """
186
    return base64.urlsafe_b64encode(number.long_to_bytes(x))
1✔
187

188

189
def host_url(path_query=None):
1✔
190
    base = request.host_url
1✔
191
    if (util.domain_or_parent_in(request.host, OTHER_DOMAINS)
1✔
192
            # when running locally against prod datastore
193
            or (not DEBUG and request.host in LOCAL_DOMAINS)):
194
        base = f'https://{PRIMARY_DOMAIN}'
1✔
195

196
    assert base
1✔
197
    return urljoin(base, path_query)
1✔
198

199

200
def error(err, status=400, exc_info=None, **kwargs):
1✔
201
    """Like :func:`oauth_dropins.webutil.flask_util.error`, but wraps body in JSON."""
202
    msg = str(err)
1✔
203
    logger.info(f'Returning {status}: {msg}', exc_info=exc_info)
1✔
204
    abort(status, response=make_response({'error': msg}, status), **kwargs)
1✔
205

206

207
def pretty_link(url, text=None, user=None, **kwargs):
1✔
208
    """Wrapper around :func:`oauth_dropins.webutil.util.pretty_link` that converts Mastodon user URLs to @-@ handles.
209

210
    Eg for URLs like https://mastodon.social/@foo and
211
    https://mastodon.social/users/foo, defaults text to ``@foo@mastodon.social``
212
    if it's not provided.
213

214
    Args:
215
      url (str)
216
      text (str)
217
      user (models.User): current user
218
      kwargs: passed through to :func:`oauth_dropins.webutil.util.pretty_link`
219
    """
220
    if user and user.is_web_url(url):
1✔
221
        return user.user_link(handle=False, pictures=True)
1✔
222

223
    if text is None:
1✔
224
        match = re.match(r'https?://([^/]+)/(@|users/)([^/]+)$', url)
1✔
225
        if match:
1✔
226
            text = match.expand(r'@\3@\1')
1✔
227

228
    return util.pretty_link(url, text=text, **kwargs)
1✔
229

230

231
def content_type(resp):
1✔
232
    """Returns a :class:`requests.Response`'s Content-Type, without charset suffix."""
233
    type = resp.headers.get('Content-Type')
1✔
234
    if type:
1✔
235
        # TODO: don't remove profile
236
        # right now, when we remove it, and don't use it to compare against eg
237
        # as2.CONTENT_TYPE_LD, we end up accepting non-AS2 JSON-LD, eg:
238
        # Content-Type: application/ld+json; charset=UTF-8
239
        return type.split(';')[0]
1✔
240

241

242
def redirect_wrap(url, domain=None):
1✔
243
    """Returns a URL on our domain that redirects to this URL.
244

245
    ...to satisfy Mastodon's non-standard domain matching requirement. :(
246

247
    Args:
248
      url (str)
249
      domain (str): optional Bridgy Fed domain to use. Must be in :attr:`DOMAINS`
250

251
    * https://github.com/snarfed/bridgy-fed/issues/16#issuecomment-424799599
252
    * https://github.com/tootsuite/mastodon/pull/6219#issuecomment-429142747
253

254
    Returns:
255
      str: redirect url
256
    """
257
    if not url or util.domain_from_link(url) in DOMAINS:
1✔
258
        return url
1✔
259

260
    path = '/r/' + url
1✔
261

262
    if domain:
1✔
263
        assert domain in DOMAINS, (domain, url)
1✔
264
        return urljoin(f'https://{domain}/', path)
1✔
265

266
    return host_url(path)
1✔
267

268

269
def subdomain_wrap(proto, path=None):
1✔
270
    """Returns the URL for a given path on this protocol's subdomain.
271

272
    Eg for the path ``foo/bar`` on ActivityPub, returns
273
    ``https://ap.brid.gy/foo/bar``.
274

275
    Args:
276
      proto (subclass of :class:`protocol.Protocol`)
277

278
    Returns:
279
      str: URL
280
    """
281
    subdomain = proto.ABBREV if proto and proto.ABBREV else 'fed'
1✔
282
    return urljoin(f'https://{subdomain}{SUPERDOMAIN}/', path)
1✔
283

284

285
def unwrap(val, field=None):
1✔
286
    """Removes our subdomain/redirect wrapping from a URL, if it's there.
287

288
    ``val`` may be a string, dict, or list. dicts and lists are unwrapped
289
    recursively.
290

291
    Strings that aren't wrapped URLs are left unchanged.
292

293
    Args:
294
      val (str or dict or list)
295
      field (str): optional field name for this value
296

297
    Returns:
298
      str: unwrapped url
299
    """
300
    if isinstance(val, dict):
1✔
301
        # TODO: clean up. https://github.com/snarfed/bridgy-fed/issues/967
302
        id = val.get('id')
1✔
303
        if (isinstance(id, str)
1✔
304
                and urlparse(id).path.strip('/') in DOMAINS + ('',)
305
                and util.domain_from_link(id) in DOMAINS):
306
            # protocol bot user, don't touch its URLs
307
            return {**val, 'id': unwrap(id)}
1✔
308

309
        return {f: unwrap(v, field=f) for f, v in val.items()}
1✔
310

311
    elif isinstance(val, list):
1✔
312
        return [unwrap(v) for v in val]
1✔
313

314
    elif isinstance(val, str):
1✔
315
        if match := SUBDOMAIN_BASE_URL_RE.match(val):
1✔
316
            unwrapped = match.group('path')
1✔
317
            if field in ID_FIELDS and re.fullmatch(DOMAIN_RE, unwrapped):
1✔
318
                return f'https://{unwrapped}/'
1✔
319
            return unwrapped
1✔
320

321
    return val
1✔
322

323

324
def create_task(queue, app_id=GCP_PROJECT_ID, delay=None, app=None, **params):
1✔
325
    """Adds a Cloud Tasks task.
326

327
    If running in a local server, runs the task handler inline instead of
328
    creating a task.
329

330
    Args:
331
      queue (str): queue name
332
      delay (:class:`datetime.timedelta`): optional, used as task ETA (from now)
333
      app (flask.Flask): if not provided, defaults to ``router.app``
334
      params: form-encoded and included in the task request body
335

336
    Returns:
337
      flask.Response or (str, int): response from either running the task
338
      inline, if running in a local server, or the response from creating the
339
      task.
340
    """
341
    assert queue
1✔
342
    path = f'/queue/{queue}'
1✔
343

344
    # removed from "Added X task ..." log message below to cut logging costs
345
    # https://github.com/snarfed/bridgy-fed/issues/1149#issuecomment-2265861956
346
    # loggable = {k: '{...}' if isinstance(v, dict) else v for k, v in params.items()}
347
    params = {
1✔
348
        k: json_dumps(v, sort_keys=True) if isinstance(v, dict) else v
349
        for k, v in params.items()
350
        if v is not None
351
    }
352

353
    try:
1✔
354
        authorization = request.headers.get('Authorization') or ''
1✔
355
        traceparent = request.headers.get('traceparent') or ''
1✔
356
    except RuntimeError:  # not currently in a request context
1✔
357
        authorization = traceparent = ''
1✔
358

359
    if RUN_TASKS_INLINE or LOCAL_SERVER:
1✔
360
        logger.info(f'Running task inline: {queue} {params}')
1✔
361
        if not app:
1✔
362
            from router import app
1✔
363
        return app.test_client().post(path, data=params, headers={
1✔
364
              flask_util.CLOUD_TASKS_TASK_HEADER: 'inline',
365
              'Authorization': authorization,
366
        })
367

368
        # # alternative: run inline in this request context
369
        # request.form = params
370
        # endpoint, args = app.url_map.bind(request.server[0])\
371
        #                             .match(path, method='POST')
372
        # return app.view_functions[endpoint](**args)
373

374
    # determine task ETA
375
    eta = None
1✔
376
    now = util.now()
1✔
377
    if authed_as := params.get('authed_as'):
1✔
378
        eta = memcache.task_eta(queue, authed_as)
1✔
379

380
    if delay:
1✔
381
        if not eta:
1✔
382
            eta = now
1✔
383
        eta += delay
1✔
384

385
    schedule_time = None
1✔
386
    delay_msg = 'now'
1✔
387
    if eta and eta > now + timedelta(seconds=1):
1✔
388
        schedule_time = Timestamp(seconds=int(eta.timestamp()))
1✔
389
        # we use the received_at param to measure and log our task processing delay.
390
        # skip that if we're deliberately rate limiting/delaying the task.
391
        params.pop('received_at', None)
1✔
392
        delay_msg = f'in {eta - now}'
1✔
393

394
    # construct task object
395
    body = urllib.parse.urlencode(sorted(params.items())).encode()
1✔
396
    task = {
1✔
397
        'app_engine_http_request': {
398
            'http_method': 'POST',
399
            'relative_uri': path,
400
            'body': body,
401
            'headers': {
402
                'Content-Type': 'application/x-www-form-urlencoded',
403
                'Authorization': (request.headers.get('Authorization', '')
404
                                  if flask.has_request_context() else ''),
405
                # propagate trace id
406
                # https://cloud.google.com/trace/docs/trace-context#http-requests
407
                # https://stackoverflow.com/a/71343735/186123
408
                'traceparent': traceparent,
409
            },
410
        },
411
    }
412
    if schedule_time:
1✔
413
        task['schedule_time'] = schedule_time
1✔
414

415
    parent = tasks_client.queue_path(app_id, TASKS_LOCATION, queue)
1✔
416
    task = tasks_client.create_task(parent=parent, task=task)
1✔
417

418
    msg = f'Added {queue} {task.name.split("/")[-1]} {delay_msg}'
1✔
419
    if delay_msg or not traceparent:
1✔
420
        logger.info(msg)
1✔
421

422
    return msg, 202
1✔
423

424

425
def report_exception(**kwargs):
1✔
UNCOV
426
    return report_error(msg=None, exception=True, **kwargs)
×
427

428

429
def report_error(msg, *, exception=False, **kwargs):
1✔
430
    """Reports an error to StackDriver Error Reporting.
431

432
    https://cloud.google.com/python/docs/reference/clouderrorreporting/latest/google.cloud.error_reporting.client.Client
433

434
    If ``DEBUG`` and ``exception`` are ``True``, re-raises the exception instead.
435

436
    Duplicated in ``bridgy.util``.
437
    """
438
    if DEBUG or LOCAL_SERVER:
1✔
439
        if DEBUG and exception:
1✔
UNCOV
440
            raise
×
441
        else:
442
            # must be at warning level. logging with exception at error level or
443
            # above will report to prod error reporting
444
            logger.warning(msg, exc_info=exception)
1✔
445
            return
1✔
446

447
    http_context = build_flask_context(request) if has_request_context() else None
1✔
448

449
    try:
1✔
450
        if exception:
1✔
451
            logger.error('', exc_info=True)
×
UNCOV
452
            error_reporting_client.report_exception(
×
453
                http_context=http_context, **kwargs)
454
        else:
455
            logger.error(msg)
1✔
456
            error_reporting_client.report(
1✔
457
                msg, http_context=http_context, **kwargs)
UNCOV
458
    except BaseException:
×
UNCOV
459
        kwargs['exception'] = exception
×
UNCOV
460
        logger.warning(f'Failed to report error! {kwargs}', exc_info=exception)
×
461

462

463
def cache_policy(key):
1✔
464
    """In memory ndb cache.
465

466
    https://github.com/snarfed/bridgy-fed/issues/1149#issuecomment-2261383697
467

468
    Only cache kinds in memory that are immutable or largely harmless when changed.
469

470
    Keep an eye on this in case we start seeing problems due to this ndb bug
471
    where unstored in-memory modifications get returned by later gets:
472
    https://github.com/googleapis/python-ndb/issues/888
473

474
    Args:
475
      key (google.cloud.datastore.key.Key or google.cloud.ndb.key.Key):
476
        see https://github.com/googleapis/python-ndb/issues/987
477

478
    Returns:
479
      bool: whether to cache this object
480
    """
481
    if isinstance(key, Key):
1✔
482
        # use internal google.cloud.datastore.key.Key
483
        # https://github.com/googleapis/python-ndb/issues/987
484
        key = key._key
1✔
485

486
    return key and key.kind in ('AtpBlock', 'AtpSequence', 'Object')
1✔
487

488

489
def global_cache_policy(key):
1✔
490
    return True
1✔
491

492

493
PROFILE_ID_RE = re.compile(
1✔
494
    fr"""
495
      /users?/[^/]+$ |
496
      /app.bsky.actor.profile/self$ |
497
      ^did:[a-z0-9:.]+$ |
498
      ^https://{DOMAIN_RE[1:-1]}/?$
499
    """, re.VERBOSE)
500

501
def global_cache_timeout_policy(key):
1✔
502
    """Cache everything for 2h.
503

504
    Args:
505
      key (google.cloud.datastore.key.Key or google.cloud.ndb.key.Key):
506
        see https://github.com/googleapis/python-ndb/issues/987
507

508
    Returns:
509
      int: cache expiration for this object, in seconds
510
    """
511
    return int(NDB_MEMCACHE_TIMEOUT.total_seconds())
1✔
512

513

514
NDB_CONTEXT_KWARGS = {
1✔
515
    'cache_policy': cache_policy,
516
    'global_cache': memcache.global_cache,
517
    'global_cache_policy': global_cache_policy,
518
    'global_cache_timeout_policy': global_cache_timeout_policy,
519
}
520

521

522
def log_request():
1✔
523
    """Logs GET query params and POST form.
524

525
    Limits each value to 1000 chars."""
526
    logger.info(f'Params:\n' + '\n'.join(
1✔
527
        f'{k} = {v[:1000]}' for k, v in request.values.items()))
528

529

530
class FlashErrors(View):
1✔
531
    """Wraps a Flask :class:`flask.view.View` and flashes errors.
532

533
    Mostly used with OAuth endpoints.
534
    """
535
    def dispatch_request(self):
1✔
UNCOV
536
        try:
×
UNCOV
537
            return super().dispatch_request()
×
UNCOV
538
        except (ValueError, requests.RequestException) as e:
×
UNCOV
539
            logger.warning(f'{self.__class__.__name__} error', exc_info=True)
×
UNCOV
540
            _, body = interpret_http_exception(e)
×
UNCOV
541
            flask_util.flash(util.linkify(body or str(e), pretty=True))
×
UNCOV
542
            return redirect('/login')
×
543

544

545
def render_template(template, **kwargs):
1✔
546
    return flask.render_template(
1✔
547
        template,
548
        isinstance=isinstance,
549
        request=request,
550
        set=set,
551
        util=util,
552
        **kwargs)
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc