• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

snarfed / bridgy-fed / 1a598cc4-2e8c-4b85-a3fe-a27403ef5124

19 Dec 2024 12:00AM UTC coverage: 93.154% (+0.002%) from 93.152%
1a598cc4-2e8c-4b85-a3fe-a27403ef5124

push

circleci

snarfed
set up, but don't enable, ndb debug logging

for #1149

4409 of 4733 relevant lines covered (93.15%)

0.93 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

94.5
/common.py
1
"""Misc common utilities."""
2
import base64
1✔
3
from datetime import timedelta
1✔
4
import functools
1✔
5
import logging
1✔
6
import os
1✔
7
from pathlib import Path
1✔
8
import re
1✔
9
import threading
1✔
10
import urllib.parse
1✔
11
from urllib.parse import urljoin, urlparse
1✔
12

13
import cachetools
1✔
14
from Crypto.Util import number
1✔
15
from flask import abort, g, has_request_context, make_response, request
1✔
16
from google.cloud.error_reporting.util import build_flask_context
1✔
17
from google.cloud import ndb
1✔
18
from google.cloud.ndb.global_cache import _InProcessGlobalCache, MemcacheCache
1✔
19
from google.cloud.ndb.key import Key
1✔
20
from google.protobuf.timestamp_pb2 import Timestamp
1✔
21
from granary import as2
1✔
22
from oauth_dropins.webutil import util, webmention
1✔
23
from oauth_dropins.webutil.appengine_config import error_reporting_client, tasks_client
1✔
24
from oauth_dropins.webutil import appengine_info
1✔
25
from oauth_dropins.webutil.appengine_info import DEBUG
1✔
26
from oauth_dropins.webutil import flask_util
1✔
27
from oauth_dropins.webutil.util import json_dumps
1✔
28
from negotiator import ContentNegotiator, AcceptParameters, ContentType
1✔
29
import pymemcache.client.base
1✔
30
from pymemcache.serde import PickleSerde
1✔
31
from pymemcache.test.utils import MockMemcacheClient
1✔
32

33
logger = logging.getLogger(__name__)
1✔
34

35
# allow hostname chars (a-z, 0-9, -), allow arbitrary unicode (eg ☃.net), don't
36
# allow specific chars that we'll often see in webfinger, AP handles, etc. (@, :)
37
# https://stackoverflow.com/questions/10306690/what-is-a-regular-expression-which-will-match-a-valid-domain-name-without-a-subd
38
#
39
# TODO: preprocess with domain2idna, then narrow this to just [a-z0-9-]
40
DOMAIN_RE = r'^([^/:;@?!\'.]+\.)+[^/:@_?!\'.]+$'
1✔
41

42
CONTENT_TYPE_HTML = 'text/html; charset=utf-8'
1✔
43

44
PRIMARY_DOMAIN = 'fed.brid.gy'
1✔
45
# protocol-specific subdomains are under this "super"domain
46
SUPERDOMAIN = '.brid.gy'
1✔
47
# TODO: add a Flask route decorator version of util.canonicalize_domain, then
48
# use it to canonicalize most UI routes from these to fed.brid.gy.
49
# TODO: unify with models.PROTOCOLS
50
PROTOCOL_DOMAINS = (
1✔
51
    'ap.brid.gy',
52
    'atproto.brid.gy',
53
    'bsky.brid.gy',
54
    'web.brid.gy',
55
    'efake.brid.gy',
56
    'fa.brid.gy',
57
    'other.brid.gy',
58
)
59
OTHER_DOMAINS = (
1✔
60
    'bridgy-federated.appspot.com',
61
    'bridgy-federated.uc.r.appspot.com',
62
)
63
LOCAL_DOMAINS = (
1✔
64
  'localhost',
65
  'localhost:8080',
66
  'my.dev.com:8080',
67
)
68
DOMAINS = (PRIMARY_DOMAIN,) + PROTOCOL_DOMAINS + OTHER_DOMAINS + LOCAL_DOMAINS
1✔
69
# TODO: unify with manual_opt_out
70
# TODO: unify with Bridgy's
71
DOMAIN_BLOCKLIST = (
1✔
72
    'bsky.social',
73
    'facebook.com',
74
    'fb.com',
75
    'instagram.com',
76
    'reddit.com',
77
    't.co',
78
    'tiktok.com',
79
    'twitter.com',
80
    'x.com',
81
)
82

83
SMTP_HOST = 'smtp.gmail.com'
1✔
84
SMTP_PORT = 587
1✔
85

86
# populated in models.reset_protocol_properties
87
SUBDOMAIN_BASE_URL_RE = None
1✔
88
ID_FIELDS = ('id', 'object', 'actor', 'author', 'inReplyTo', 'url')
1✔
89

90
CACHE_CONTROL = {'Cache-Control': 'public, max-age=3600'}  # 1 hour
1✔
91

92
USER_AGENT = 'Bridgy Fed (https://fed.brid.gy/)'
1✔
93
util.set_user_agent(USER_AGENT)
1✔
94

95
# https://cloud.google.com/appengine/docs/locations
96
TASKS_LOCATION = 'us-central1'
1✔
97
RUN_TASKS_INLINE = False  # overridden by unit tests
1✔
98

99
# for Protocol.REQUIRES_OLD_ACCOUNT, how old is old enough
100
OLD_ACCOUNT_AGE = timedelta(days=14)
1✔
101

102
# populated later in this file
103
NDB_CONTEXT_KWARGS = None
1✔
104

105
# https://github.com/memcached/memcached/wiki/Commands#standard-protocol
106
MEMCACHE_KEY_MAX_LEN = 250
1✔
107

108
if appengine_info.DEBUG or appengine_info.LOCAL_SERVER:
1✔
109
    logger.info('Using in memory mock memcache')
1✔
110
    memcache = MockMemcacheClient(allow_unicode_keys=True)
1✔
111
    pickle_memcache = MockMemcacheClient(allow_unicode_keys=True, serde=PickleSerde())
1✔
112
    global_cache = _InProcessGlobalCache()
1✔
113
else:
114
    logger.info('Using production Memorystore memcache')
×
115
    memcache = pymemcache.client.base.PooledClient(
×
116
        os.environ['MEMCACHE_HOST'], timeout=10, connect_timeout=10,  # seconds
117
        allow_unicode_keys=True)
118
    pickle_memcache = pymemcache.client.base.PooledClient(
×
119
        os.environ['MEMCACHE_HOST'], timeout=10, connect_timeout=10,  # seconds
120
        serde=PickleSerde(), allow_unicode_keys=True)
121
    global_cache = MemcacheCache(memcache)
×
122

123
_negotiator = ContentNegotiator(acceptable=[
1✔
124
    AcceptParameters(ContentType(CONTENT_TYPE_HTML)),
125
    AcceptParameters(ContentType(as2.CONTENT_TYPE)),
126
    AcceptParameters(ContentType(as2.CONTENT_TYPE_LD)),
127
])
128

129

130
@functools.cache
1✔
131
def protocol_user_copy_ids():
1✔
132
    """Returns all copy ids for protocol bot users."""
133
    ids = []
1✔
134

135
    from web import Web
1✔
136
    for user in ndb.get_multi(Web(id=domain).key for domain in PROTOCOL_DOMAINS):
1✔
137
        if user:
1✔
138
            ids.extend(copy.uri for copy in user.copies)
1✔
139

140
    return tuple(ids)
1✔
141

142

143
def base64_to_long(x):
1✔
144
    """Converts from URL safe base64 encoding to long integer.
145

146
    Originally from ``django_salmon.magicsigs``. Used in :meth:`User.public_pem`
147
    and :meth:`User.private_pem`.
148
    """
149
    return number.bytes_to_long(base64.urlsafe_b64decode(x))
1✔
150

151

152
def long_to_base64(x):
1✔
153
    """Converts from long integer to base64 URL safe encoding.
154

155
    Originally from ``django_salmon.magicsigs``. Used in :meth:`User.get_or_create`.
156
    """
157
    return base64.urlsafe_b64encode(number.long_to_bytes(x))
1✔
158

159

160
def host_url(path_query=None):
1✔
161
    base = request.host_url
1✔
162
    if (util.domain_or_parent_in(request.host, OTHER_DOMAINS)
1✔
163
            # when running locally against prod datastore
164
            or (not DEBUG and request.host in LOCAL_DOMAINS)):
165
        base = f'https://{PRIMARY_DOMAIN}'
1✔
166

167
    assert base
1✔
168
    return urljoin(base, path_query)
1✔
169

170

171
def error(err, status=400, exc_info=None, **kwargs):
1✔
172
    """Like :func:`oauth_dropins.webutil.flask_util.error`, but wraps body in JSON."""
173
    msg = str(err)
1✔
174
    logger.info(f'Returning {status}: {msg}', exc_info=exc_info)
1✔
175
    abort(status, response=make_response({'error': msg}, status), **kwargs)
1✔
176

177

178
def pretty_link(url, text=None, user=None, **kwargs):
1✔
179
    """Wrapper around :func:`oauth_dropins.webutil.util.pretty_link` that converts Mastodon user URLs to @-@ handles.
180

181
    Eg for URLs like https://mastodon.social/@foo and
182
    https://mastodon.social/users/foo, defaults text to ``@foo@mastodon.social``
183
    if it's not provided.
184

185
    Args:
186
      url (str)
187
      text (str)
188
      user (models.User): current user
189
      kwargs: passed through to :func:`oauth_dropins.webutil.util.pretty_link`
190
    """
191
    if user and user.is_web_url(url):
1✔
192
        return user.user_link(handle=False, pictures=True)
1✔
193

194
    if text is None:
1✔
195
        match = re.match(r'https?://([^/]+)/(@|users/)([^/]+)$', url)
1✔
196
        if match:
1✔
197
            text = match.expand(r'@\3@\1')
1✔
198

199
    return util.pretty_link(url, text=text, **kwargs)
1✔
200

201

202
def content_type(resp):
1✔
203
    """Returns a :class:`requests.Response`'s Content-Type, without charset suffix."""
204
    type = resp.headers.get('Content-Type')
1✔
205
    if type:
1✔
206
        return type.split(';')[0]
1✔
207

208

209
def redirect_wrap(url, domain=None):
1✔
210
    """Returns a URL on our domain that redirects to this URL.
211

212
    ...to satisfy Mastodon's non-standard domain matching requirement. :(
213

214
    Args:
215
      url (str)
216
      domain (str): optional Bridgy Fed domain to use. Must be in :attr:`DOMAINS`
217

218
    * https://github.com/snarfed/bridgy-fed/issues/16#issuecomment-424799599
219
    * https://github.com/tootsuite/mastodon/pull/6219#issuecomment-429142747
220

221
    Returns:
222
      str: redirect url
223
    """
224
    if not url or util.domain_from_link(url) in DOMAINS:
1✔
225
        return url
1✔
226

227
    path = '/r/' + url
1✔
228

229
    if domain:
1✔
230
        assert domain in DOMAINS, (domain, url)
1✔
231
        return urljoin(f'https://{domain}/', path)
1✔
232

233
    return host_url(path)
1✔
234

235

236
def subdomain_wrap(proto, path=None):
1✔
237
    """Returns the URL for a given path on this protocol's subdomain.
238

239
    Eg for the path ``foo/bar`` on ActivityPub, returns
240
    ``https://ap.brid.gy/foo/bar``.
241

242
    Args:
243
      proto (subclass of :class:`protocol.Protocol`)
244

245
    Returns:
246
      str: URL
247
    """
248
    subdomain = proto.ABBREV if proto and proto.ABBREV else 'fed'
1✔
249
    return urljoin(f'https://{subdomain}{SUPERDOMAIN}/', path)
1✔
250

251

252
def unwrap(val, field=None):
1✔
253
    """Removes our subdomain/redirect wrapping from a URL, if it's there.
254

255
    ``val`` may be a string, dict, or list. dicts and lists are unwrapped
256
    recursively.
257

258
    Strings that aren't wrapped URLs are left unchanged.
259

260
    Args:
261
      val (str or dict or list)
262
      field (str): optional field name for this value
263

264
    Returns:
265
      str: unwrapped url
266
    """
267

268
    if isinstance(val, dict):
1✔
269
        # TODO: clean up. https://github.com/snarfed/bridgy-fed/issues/967
270
        id = val.get('id')
1✔
271
        if (isinstance(id, str)
1✔
272
                and urlparse(id).path.strip('/') in DOMAINS + ('',)
273
                and util.domain_from_link(id) in DOMAINS):
274
            # protocol bot user, don't touch its URLs
275
            return {**val, 'id': unwrap(id)}
1✔
276

277
        return {f: unwrap(v, field=f) for f, v in val.items()}
1✔
278

279
    elif isinstance(val, list):
1✔
280
        return [unwrap(v) for v in val]
1✔
281

282
    elif isinstance(val, str):
1✔
283
        if match := SUBDOMAIN_BASE_URL_RE.match(val):
1✔
284
            unwrapped = match.group('path')
1✔
285
            if field in ID_FIELDS and re.fullmatch(DOMAIN_RE, unwrapped):
1✔
286
                return f'https://{unwrapped}/'
1✔
287
            return unwrapped
1✔
288

289
    return val
1✔
290

291

292
def webmention_endpoint_cache_key(url):
1✔
293
    """Returns cache key for a cached webmention endpoint for a given URL.
294

295
    Just the domain by default. If the URL is the home page, ie path is ``/``,
296
    the key includes a ``/`` at the end, so that we cache webmention endpoints
297
    for home pages separate from other pages.
298
    https://github.com/snarfed/bridgy/issues/701
299

300
    Example: ``snarfed.org /``
301

302
    https://github.com/snarfed/bridgy-fed/issues/423
303

304
    Adapted from ``bridgy/util.py``.
305
    """
306
    parsed = urllib.parse.urlparse(url)
1✔
307
    key = parsed.netloc
1✔
308
    if parsed.path in ('', '/'):
1✔
309
        key += ' /'
1✔
310

311
    logger.debug(f'wm cache key {key}')
1✔
312
    return key
1✔
313

314

315
@cachetools.cached(cachetools.TTLCache(50000, 60 * 60 * 2),  # 2h expiration
1✔
316
                   key=webmention_endpoint_cache_key,
317
                   lock=threading.Lock())
318
def webmention_discover(url, **kwargs):
1✔
319
    """Thin caching wrapper around :func:`oauth_dropins.webutil.webmention.discover`."""
320
    return webmention.discover(url, **kwargs)
1✔
321

322

323
def create_task(queue, delay=None, **params):
1✔
324
    """Adds a Cloud Tasks task.
325

326
    If running in a local server, runs the task handler inline instead of
327
    creating a task.
328

329
    Args:
330
      queue (str): queue name
331
      delay (:class:`datetime.timedelta`): optional, used as task ETA (from now)
332
      params: form-encoded and included in the task request body
333

334
    Returns:
335
      flask.Response or (str, int): response from either running the task
336
      inline, if running in a local server, or the response from creating the
337
      task.
338
    """
339
    assert queue
1✔
340
    path = f'/queue/{queue}'
1✔
341

342
    # removed from "Added X task ..." log messae below to cut logging costs
343
    # https://github.com/snarfed/bridgy-fed/issues/1149#issuecomment-2265861956
344
    # loggable = {k: '{...}' if isinstance(v, dict) else v for k, v in params.items()}
345
    params = {k: json_dumps(v, sort_keys=True) if isinstance(v, dict) else v
1✔
346
              for k, v in params.items()}
347

348
    if RUN_TASKS_INLINE or appengine_info.LOCAL_SERVER:
1✔
349
        logger.info(f'Running task inline: {queue} {params}')
1✔
350
        from router import app
1✔
351
        return app.test_client().post(
1✔
352
            path, data=params, headers={flask_util.CLOUD_TASKS_TASK_HEADER: 'x'})
353

354
        # # alternative: run inline in this request context
355
        # request.form = params
356
        # endpoint, args = app.url_map.bind(request.server[0])\
357
        #                             .match(path, method='POST')
358
        # return app.view_functions[endpoint](**args)
359

360
    body = urllib.parse.urlencode(sorted(params.items())).encode()
1✔
361
    traceparent = request.headers.get('traceparent', '')
1✔
362
    task = {
1✔
363
        'app_engine_http_request': {
364
            'http_method': 'POST',
365
            'relative_uri': path,
366
            'body': body,
367
            'headers': {
368
                'Content-Type': 'application/x-www-form-urlencoded',
369
                # propagate trace id
370
                # https://cloud.google.com/trace/docs/trace-context#http-requests
371
                # https://stackoverflow.com/a/71343735/186123
372
                'traceparent': traceparent,
373
            },
374
        },
375
    }
376
    if delay:
1✔
377
        eta_seconds = int(util.to_utc_timestamp(util.now()) + delay.total_seconds())
1✔
378
        task['schedule_time'] = Timestamp(seconds=eta_seconds)
1✔
379

380
    parent = tasks_client.queue_path(appengine_info.APP_ID, TASKS_LOCATION, queue)
1✔
381
    task = tasks_client.create_task(parent=parent, task=task)
1✔
382
    msg = f'Added {queue} {task.name.split("/")[-1]}'
1✔
383
    if not traceparent:
1✔
384
        logger.info(msg)
1✔
385
    return msg, 202
1✔
386

387

388
def report_exception(**kwargs):
1✔
389
    return report_error(msg=None, exception=True, **kwargs)
×
390

391

392
def report_error(msg, *, exception=False, **kwargs):
1✔
393
    """Reports an error to StackDriver Error Reporting.
394

395
    https://cloud.google.com/python/docs/reference/clouderrorreporting/latest/google.cloud.error_reporting.client.Client
396

397
    If ``DEBUG`` and ``exception`` are ``True``, re-raises the exception instead.
398

399
    Duplicated in ``bridgy.util``.
400
    """
401
    if DEBUG:
1✔
402
        if exception:
1✔
403
            raise
×
404
        else:
405
            logger.error(msg)
1✔
406
            return
1✔
407

408
    http_context = build_flask_context(request) if has_request_context() else None
1✔
409

410
    try:
1✔
411
        if exception:
1✔
412
            logger.error('', exc_info=True)
×
413
            error_reporting_client.report_exception(
×
414
                http_context=http_context, **kwargs)
415
        else:
416
            logger.error(msg)
1✔
417
            error_reporting_client.report(
1✔
418
                msg, http_context=http_context, **kwargs)
419
    except BaseException:
×
420
        kwargs['exception'] = exception
×
421
        logger.warning(f'Failed to report error! {kwargs}', exc_info=exception)
×
422

423

424
def cache_policy(key):
1✔
425
    """In memory ndb cache, only DID docs right now.
426

427
    https://github.com/snarfed/bridgy-fed/issues/1149#issuecomment-2261383697
428

429
    Args:
430
      key (google.cloud.datastore.key.Key or google.cloud.ndb.key.Key):
431
        see https://github.com/googleapis/python-ndb/issues/987
432

433
    Returns:
434
      bool: whether to cache this object
435
    """
436
    if isinstance(key, Key):
1✔
437
        # use internal google.cloud.datastore.key.Key
438
        # https://github.com/googleapis/python-ndb/issues/987
439
        key = key._key
1✔
440

441
    return key and key.kind == 'Object' and key.name.startswith('did:')
1✔
442

443

444
def global_cache_policy(key):
1✔
445
    return True
1✔
446

447

448
PROFILE_ID_RE = re.compile(
1✔
449
    fr"""
450
      /users?/[^/]+$ |
451
      /app.bsky.actor.profile/self$ |
452
      ^did:[a-z0-9:.]+$ |
453
      ^https://{DOMAIN_RE[1:-1]}/?$
454
    """, re.VERBOSE)
455

456
def global_cache_timeout_policy(key):
1✔
457
    """Cache users and profile objects longer than other objects.
458

459
    Args:
460
      key (google.cloud.datastore.key.Key or google.cloud.ndb.key.Key):
461
        see https://github.com/googleapis/python-ndb/issues/987
462

463
    Returns:
464
      int: cache expiration for this object, in seconds
465
    """
466
    if isinstance(key, Key):
1✔
467
        # use internal google.cloud.datastore.key.Key
468
        # https://github.com/googleapis/python-ndb/issues/987
469
        key = key._key
×
470

471
    if (key and (key.kind in ('ActivityPub', 'ATProto', 'Follower', 'MagicKey')
1✔
472
                 or key.kind == 'Object' and PROFILE_ID_RE.search(key.name))):
473
        return int(timedelta(hours=2).total_seconds())
1✔
474

475
    return int(timedelta(minutes=30).total_seconds())
1✔
476

477

478
NDB_CONTEXT_KWARGS = {
1✔
479
    # limited context-local cache. avoid full one due to this bug:
480
    # https://github.com/googleapis/python-ndb/issues/888
481
    'cache_policy': cache_policy,
482
    'global_cache': global_cache,
483
    'global_cache_policy': global_cache_policy,
484
    'global_cache_timeout_policy': global_cache_timeout_policy,
485
}
486

487

488
def memcache_key(key):
1✔
489
    """Preprocesses a memcache key. Right now just truncates it to 250 chars.
490

491
    https://pymemcache.readthedocs.io/en/latest/apidoc/pymemcache.client.base.html
492
    https://github.com/memcached/memcached/wiki/Commands#standard-protocol
493

494
    TODO: truncate to 250 *UTF-8* chars, to handle Unicode chars in URLs. Related:
495
    pymemcache Client's allow_unicode_keys constructor kwarg.
496
    """
497
    return key[:MEMCACHE_KEY_MAX_LEN].replace(' ', '%20').encode()
1✔
498

499

500
def memcache_memoize_key(fn, *args, **kwargs):
1✔
501
    return memcache_key(f'{fn.__name__}-2-{repr(args)}-{repr(kwargs)}')
1✔
502

503

504
NONE = ()  # empty tuple
1✔
505

506
def memcache_memoize(expire=None, key=None):
1✔
507
    """Memoize function decorator that stores the cached value in memcache.
508

509
    Args:
510
      expire (timedelta): optional, expiration
511
      key (callable): function that takes the function's (*args, **kwargs) and
512
        returns the cache key to use
513
    """
514
    if expire:
1✔
515
        expire = int(expire.total_seconds())
1✔
516

517
    def decorator(fn):
1✔
518
        @functools.wraps(fn)
1✔
519
        def wrapped(*args, **kwargs):
1✔
520
            if key:
1✔
521
                cache_key = memcache_memoize_key(fn, key(*args, **kwargs))
1✔
522
            else:
523
                cache_key = memcache_memoize_key(fn, *args, **kwargs)
1✔
524

525
            val = pickle_memcache.get(cache_key)
1✔
526
            if val is not None:
1✔
527
                # logger.debug(f'cache hit {cache_key}')
528
                return None if val == NONE else val
1✔
529

530
            # logger.debug(f'cache miss {cache_key}')
531
            val = fn(*args, **kwargs)
1✔
532
            pickle_memcache.set(cache_key, NONE if val is None else val, expire=expire)
1✔
533
            return val
1✔
534

535
        return wrapped
1✔
536

537
    return decorator
1✔
538

539

540
def as2_request_type():
1✔
541
    """If this request has conneg (ie the ``Accept`` header) for AS2, returns its type.
542

543
    Specifically, returns either
544
    ``application/ld+json; profile="https://www.w3.org/ns/activitystreams"`` or
545
    ``application/activity+json``.
546

547
    If the current request's conneg isn't asking for AS2, returns None.
548

549
    https://www.w3.org/TR/activitypub/#retrieving-objects
550
    https://snarfed.org/2023-03-24_49619-2
551
    """
552
    if accept := request.headers.get('Accept'):
1✔
553
        try:
1✔
554
            negotiated = _negotiator.negotiate(accept)
1✔
555
        except ValueError:
1✔
556
            # work around https://github.com/CottageLabs/negotiator/issues/6
557
            negotiated = None
1✔
558
        if negotiated:
1✔
559
            accept_type = str(negotiated.content_type)
1✔
560
            if accept_type == as2.CONTENT_TYPE:
1✔
561
                return as2.CONTENT_TYPE
1✔
562
            elif accept_type in (as2.CONTENT_TYPE_LD, as2.CONTENT_TYPE_LD_PROFILE):
1✔
563
                return as2.CONTENT_TYPE_LD_PROFILE
1✔
564
            logger.debug(f'Conneg resolved {accept_type} for Accept: {accept}')
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc