• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

snarfed / bridgy-fed / 9f4e0d09-3936-4691-ba3a-bced0c027ffa

29 May 2026 10:38PM UTC coverage: 94.073% (+0.009%) from 94.064%
9f4e0d09-3936-4691-ba3a-bced0c027ffa

push

circleci

snarfed
Circle: only deploy if commit message contains [deploy]

7714 of 8200 relevant lines covered (94.07%)

0.94 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

95.83
/protocol.py
1
"""Base protocol class and common code."""
2
from bs4 import BeautifulSoup
1✔
3
import copy
1✔
4
from datetime import datetime, timedelta, timezone
1✔
5
import logging
1✔
6
import os
1✔
7
import re
1✔
8
from threading import Lock
1✔
9
from urllib.parse import urljoin, urlparse
1✔
10

11
from cachetools import cached, LRUCache
1✔
12
from flask import request
1✔
13
from google.cloud import ndb
1✔
14
from google.cloud.ndb import OR
1✔
15
from google.cloud.ndb.model import _entity_to_protobuf
1✔
16
from granary import as1, as2, source
1✔
17
from granary.source import HTML_ENTITY_RE, html_to_text
1✔
18
from pymemcache.exceptions import (
1✔
19
    MemcacheServerError,
20
    MemcacheUnexpectedCloseError,
21
    MemcacheUnknownError,
22
)
23
from requests import RequestException
1✔
24
from websockets.exceptions import InvalidStatus
1✔
25
from webutil.appengine_info import DEBUG
1✔
26
from webutil.flask_util import cloud_tasks_only
1✔
27
from webutil.models import MAX_ENTITY_SIZE
1✔
28
from webutil import util
1✔
29
from webutil.util import json_dumps, json_loads
1✔
30
import werkzeug.exceptions
1✔
31
from werkzeug.exceptions import BadGateway, BadRequest, HTTPException
1✔
32

33
import common
1✔
34
from common import (
1✔
35
    ErrorButDoNotRetryTask,
36
    report_error,
37
)
38
from domains import (
1✔
39
    DOMAINS,
40
    LOCAL_DOMAINS,
41
    PRIMARY_DOMAIN,
42
    PROTOCOL_DOMAINS,
43
    SUPERDOMAIN,
44
)
45
import dms
1✔
46
from domains import DOMAIN_BLOCKLIST
1✔
47
import ids
1✔
48
import memcache
1✔
49
from models import (
1✔
50
    Follower,
51
    get_original_user_key,
52
    load_user,
53
    Object,
54
    PROTOCOLS,
55
    PROTOCOLS_BY_KIND,
56
    Target,
57
    User,
58
)
59
import notifications
1✔
60

61
OBJECT_REFRESH_AGE = timedelta(days=30)
1✔
62
DELETE_TASK_DELAY = timedelta(minutes=1)
1✔
63
CREATE_MAX_AGE = timedelta(weeks=2)
1✔
64
CREATE_MAX_AGE_EXEMPT_DOMAINS = (
1✔
65
    'alt.store',
66
)
67
# WARNING: keep this below the receive queue's min_backoff_seconds in queue.yaml!
68
MEMCACHE_LEASE_EXPIRATION = timedelta(seconds=25)
1✔
69
MEMCACHE_DOWN_TASK_DELAY = timedelta(minutes=5)
1✔
70
# WARNING: keep this in sync with queue.yaml's receive and webmention task_retry_limit!
71
TASK_RETRIES_RECEIVE = 4
1✔
72
# https://docs.cloud.google.com/tasks/docs/creating-appengine-handlers#reading-headers
73
TASK_RETRIES_HEADER = 'X-AppEngine-TaskRetryCount'
1✔
74

75
# require a follow for users on these domains before we deliver anything from
76
# them other than their profile
77
LIMITED_DOMAINS = (os.getenv('LIMITED_DOMAINS', '').split()
1✔
78
                   or util.load_file_lines('limited_domains'))
79

80
# domains to allow non-public activities from
81
NON_PUBLIC_DOMAINS = (
1✔
82
    # bridged from twitter (X). bird.makeup, kilogram.makeup, etc federate
83
    # tweets as followers-only, but they're public on twitter itself
84
    '.makeup',
85
)
86

87
DONT_STORE_AS1_TYPES = as1.CRUD_VERBS | set((
1✔
88
    'accept',
89
    'reject',
90
    'stop-following',
91
    'undo',
92
))
93
STORE_AS1_TYPES = (as1.ACTOR_TYPES | as1.POST_TYPES | as1.VERBS_WITH_OBJECT
1✔
94
                   - DONT_STORE_AS1_TYPES)
95

96
DONT_NOTIFY_TYPES = (
1✔
97
    'block',
98
)
99

100
logger = logging.getLogger(__name__)
1✔
101

102

103
def error(*args, status=299, **kwargs):
1✔
104
    """Default HTTP status code to 299 to prevent retrying task."""
105
    return common.error(*args, status=status, **kwargs)
1✔
106

107

108
def activity_id_memcache_key(id):
1✔
109
    return memcache.key(f'receive-{id}')
1✔
110

111

112
class Protocol:
1✔
113
    """Base protocol class. Not to be instantiated; classmethods only."""
114
    ABBREV = None
1✔
115
    'str: lower case abbreviation, used in URL paths'
1✔
116
    PHRASE = None
1✔
117
    'str: human-readable name or phrase. Used in phrases like ``Follow this person on {PHRASE}``'
1✔
118
    OTHER_LABELS = ()
1✔
119
    'sequence of str: label aliases'
1✔
120
    LOGO_EMOJI = ''
1✔
121
    'str: logo emoji, if any'
1✔
122
    LOGO_HTML = ''
1✔
123
    'str: logo ``<img>`` tag, if any'
1✔
124
    CONTENT_TYPE = None
1✔
125
    "str: MIME type of this protocol's native data format, appropriate for the ``Content-Type`` HTTP header."
1✔
126
    HAS_COPIES = False
1✔
127
    'bool: whether this protocol is push and needs us to proactively create "copy" users and objects, as opposed to pulling converted objects on demand'
1✔
128
    DEFAULT_TARGET = None
1✔
129
    'str: optional, the default target URI to send this protocol\'s activities to. May be used as the "shared" target. Often only set if ``HAS_COPIES`` is true.'
1✔
130
    REQUIRES_AVATAR = False
1✔
131
    "bool: whether accounts on this protocol are required to have a profile picture. If they don't, their ``User.status`` will be ``blocked``."
1✔
132
    REQUIRES_NAME = False
1✔
133
    "bool: whether accounts on this protocol are required to have a profile name that's different than their handle or id. If they don't, their ``User.status`` will be ``blocked``."
1✔
134
    REQUIRES_OLD_ACCOUNT = False
1✔
135
    "bool: whether accounts on this protocol are required to be at least :const:`common.OLD_ACCOUNT_AGE` old. If their profile includes creation date and it's not old enough, their ``User.status`` will be ``blocked``."
1✔
136
    DEFAULT_ENABLED_PROTOCOLS = ()
1✔
137
    'sequence of str: labels of other protocols that are automatically enabled for this protocol to bridge into'
1✔
138
    DEFAULT_SERVE_USER_PAGES = False
1✔
139
    "bool: whether to serve user pages for all of this protocol's users on the fed.brid.gy. If ``False``, user pages will only be served for users who have explictly opted in."
1✔
140
    SUPPORTED_AS1_TYPES = ()
1✔
141
    'sequence of str: AS1 objectTypes and verbs that this protocol supports receiving and sending'
1✔
142
    SUPPORTS_DMS = False
1✔
143
    'bool: whether this protocol can receive DMs (chat messages)'
1✔
144
    USES_OBJECT_FEED = False
1✔
145
    'bool: whether to store followers on this protocol in :attr:`Object.feed`.'
1✔
146
    HTML_PROFILES = False
1✔
147
    'bool: whether this protocol supports HTML in profile descriptions. If False, profile descriptions should be plain text.'
1✔
148
    SEND_REPLIES_TO_ORIG_POSTS_MENTIONS = False
1✔
149
    "bool: whether replies to this protocol should include the original post's mentions as delivery targets"
1✔
150
    BOTS_FOLLOW_BACK = False
1✔
151
    'bool: when a user on this protocol follows a bot user to enable bridging, does the bot follow them back?'
1✔
152
    HANDLES_PER_PAY_LEVEL_DOMAIN = None
1✔
153
    'int: how many users to allow with handles on the same pay-level domain. None for no limit.'
1✔
154
    RECEIVE_FILTERS = ()
1✔
155
    'tuple of callable: filter functions from filters.py to apply to incoming activities. Applied in order, so put the cheapest filters first.'
1✔
156
    RATE_LIMIT_TYPE = memcache.RateLimitType.LINEAR
1✔
157
    'Whether receive and send task rate limiting increases linearly or exponential.'
1✔
158

159
    @classmethod
1✔
160
    @property
1✔
161
    def LABEL(cls):
1✔
162
        """str: human-readable lower case name of this protocol, eg ``'activitypub``"""
163
        return cls.__name__.lower()
1✔
164

165
    @staticmethod
1✔
166
    def for_request(fed=None):
1✔
167
        """Returns the protocol for the current request.
168

169
        ...based on the request's hostname.
170

171
        Args:
172
          fed (str or protocol.Protocol): protocol to return if the current
173
            request is on ``fed.brid.gy``
174

175
        Returns:
176
          Protocol: protocol, or None if the provided domain or request hostname
177
          domain is not a subdomain of ``brid.gy`` or isn't a known protocol
178
        """
179
        return Protocol.for_bridgy_subdomain(request.host, fed=fed)
1✔
180

181
    @staticmethod
1✔
182
    def for_bridgy_subdomain(domain_or_url, fed=None):
1✔
183
        """Returns the protocol for a brid.gy subdomain.
184

185
        Args:
186
          domain_or_url (str)
187
          fed (str or protocol.Protocol): protocol to return if the current
188
            request is on ``fed.brid.gy``
189

190
        Returns:
191
          class: :class:`Protocol` subclass, or None if the provided domain or request
192
          hostname domain is not a subdomain of ``brid.gy`` or isn't a known
193
          protocol
194
        """
195
        domain = (util.domain_from_link(domain_or_url, minimize=False)
1✔
196
                  if util.is_web(domain_or_url)
197
                  else domain_or_url)
198

199
        if domain == PRIMARY_DOMAIN or domain in LOCAL_DOMAINS:
1✔
200
            return PROTOCOLS[fed] if isinstance(fed, str) else fed
1✔
201
        elif domain and domain.endswith(SUPERDOMAIN):
1✔
202
            label = domain.removesuffix(SUPERDOMAIN)
1✔
203
            return PROTOCOLS.get(label)
1✔
204

205
    @classmethod
1✔
206
    def owns_id(cls, id):
1✔
207
        """Returns whether this protocol owns the id, or None if it's unclear.
208

209
        To be implemented by subclasses.
210

211
        IDs are string identities that uniquely identify users or objects, and
212
        are intended primarily to be machine readable and usable. Compare to
213
        handles, which are human-chosen, human-meaningful, and often but not
214
        always unique.
215

216
        Some protocols' ids are more or less deterministic based on the id
217
        format, eg AT Protocol owns ``at://`` URIs and DIDs. Others, like
218
        http(s) URLs, could be owned by eg Web or ActivityPub.
219

220
        This should be a quick guess without expensive side effects, eg no
221
        external HTTP fetches to fetch the id itself or otherwise perform
222
        discovery.
223

224
        Returns False if the id's domain is in :const:`domains.DOMAIN_BLOCKLIST`.
225

226
        Args:
227
          id (str): user id or object id
228

229
        Returns:
230
          bool or None:
231
        """
232
        return False
1✔
233

234
    @classmethod
1✔
235
    def owns_handle(cls, handle, allow_internal=False):
1✔
236
        """Returns whether this protocol owns the handle, or None if it's unclear.
237

238
        To be implemented by subclasses.
239

240
        Handles are string identities that are human-chosen, human-meaningful,
241
        and often but not always unique. Compare to IDs, which uniquely identify
242
        users, and are intended primarily to be machine readable and usable.
243

244
        Some protocols' handles are more or less deterministic based on the id
245
        format, eg ActivityPub (technically WebFinger) handles are
246
        ``@user@instance.com``. Others, like domains, could be owned by eg Web,
247
        ActivityPub, AT Protocol, or others.
248

249
        This should be a quick guess without expensive side effects, eg no
250
        external HTTP fetches to fetch the id itself or otherwise perform
251
        discovery.
252

253
        Args:
254
          handle (str)
255
          allow_internal (bool): whether to return False for internal domains
256
            like ``fed.brid.gy``, ``bsky.brid.gy``, etc
257

258
        Returns:
259
          bool or None
260
        """
261
        return False
1✔
262

263
    @classmethod
1✔
264
    def handle_to_id(cls, handle):
1✔
265
        """Converts a handle to an id.
266

267
        To be implemented by subclasses.
268

269
        May incur network requests, eg DNS queries or HTTP requests. Avoids
270
        blocked or opted out users.
271

272
        Args:
273
          handle (str)
274

275
        Returns:
276
          str: corresponding id, or None if the handle can't be found
277
        """
278
        raise NotImplementedError()
×
279

280
    @classmethod
1✔
281
    def authed_user_for_request(cls):
1✔
282
        """Returns the authenticated user id for the current request.
283

284

285
        Checks authentication on the current request, eg HTTP Signature for
286
        ActivityPub. To be implemented by subclasses.
287

288
        Returns:
289
          str: authenticated user id, or None if there is no authentication
290

291
        Raises:
292
          RuntimeError: if the request's authentication (eg signature) is
293
          invalid or otherwise can't be verified
294
        """
295
        return None
1✔
296

297
    @classmethod
1✔
298
    def key_for(cls, id, allow_opt_out=False):
1✔
299
        """Returns the :class:`google.cloud.ndb.Key` for a given id's :class:`models.User`.
300

301
        If called via `Protocol.key_for`, infers the appropriate protocol with
302
        :meth:`for_id`. If called with a concrete subclass, uses that subclass
303
        as is.
304

305
        Args:
306
          id (str):
307
          allow_opt_out (bool): whether to allow users who are currently opted out
308

309
        Returns:
310
          google.cloud.ndb.Key: matching key, or None if the given id is not a
311
          valid :class:`User` id for this protocol.
312
        """
313
        if cls == Protocol:
1✔
314
            proto = Protocol.for_id(id)
1✔
315
            return proto.key_for(id, allow_opt_out=allow_opt_out) if proto else None
1✔
316

317
        # load user so that we follow use_instead
318
        existing = cls.get_by_id(id, allow_opt_out=True)
1✔
319
        if existing:
1✔
320
            if existing.status and not allow_opt_out:
1✔
321
                return None
1✔
322
            return existing.key
1✔
323

324
        return cls(id=id).key
1✔
325

326
    @staticmethod
1✔
327
    def _for_id_memcache_key(id, remote=None):
1✔
328
        """If id is a URL, uses its domain, otherwise returns None.
329

330
        Args:
331
          id (str)
332

333
        Returns:
334
          (str domain, bool remote) or None
335
        """
336
        domain = util.domain_from_link(id)
1✔
337
        if domain in PROTOCOL_DOMAINS:
1✔
338
            return id
1✔
339
        elif remote and util.is_web(id):
1✔
340
            return domain
1✔
341

342
    @cached(LRUCache(20000), lock=Lock())
1✔
343
    @memcache.memoize(key=_for_id_memcache_key, write=lambda id, remote=True: remote,
1✔
344
                      version=3)
345
    @staticmethod
1✔
346
    def for_id(id, remote=True):
1✔
347
        """Returns the protocol for a given id.
348

349
        Args:
350
          id (str)
351
          remote (bool): whether to perform expensive side effects like fetching
352
            the id itself over the network, or other discovery.
353

354
        Returns:
355
          Protocol subclass: matching protocol, or None if no single known
356
          protocol definitively owns this id
357
        """
358
        logger.debug(f'Determining protocol for id {id}')
1✔
359
        if not id:
1✔
360
            return None
1✔
361

362
        # remove our synthetic id fragment, if any
363
        #
364
        # will this eventually cause false positives for other services that
365
        # include our full ids inside their own ids, non-URL-encoded? guess
366
        # we'll figure that out if/when it happens.
367
        id = id.partition('#bridgy-fed-')[0]
1✔
368
        if not id:
1✔
369
            return None
1✔
370

371
        if util.is_web(id):
1✔
372
            # step 1: check for our per-protocol subdomains
373
            try:
1✔
374
                parsed = urlparse(id)
1✔
375
            except ValueError as e:
1✔
376
                logger.info(f'urlparse ValueError: {e}')
1✔
377
                return None
1✔
378

379
            is_internal = parsed.path.startswith(ids.INTERNAL_PATH_PREFIX)
1✔
380
            by_subdomain = Protocol.for_bridgy_subdomain(id)
1✔
381
            if by_subdomain and not (util.is_homepage(id) or is_internal
1✔
382
                                     or id in ids.BOT_ACTOR_AP_IDS):
383
                logger.debug(f'  {by_subdomain.LABEL} owns id {id}')
1✔
384
                return by_subdomain
1✔
385

386
        # step 2: check if any Protocols say conclusively that they own it
387
        # sort to be deterministic
388
        protocols = sorted(set(p for p in PROTOCOLS.values() if p),
1✔
389
                           key=lambda p: p.LABEL)
390
        candidates = []
1✔
391
        for protocol in protocols:
1✔
392
            owns = protocol.owns_id(id)
1✔
393
            if owns:
1✔
394
                logger.debug(f'  {protocol.LABEL} owns id {id}')
1✔
395
                return protocol
1✔
396
            elif owns is not False:
1✔
397
                candidates.append(protocol)
1✔
398

399
        if len(candidates) == 1:
1✔
400
            logger.debug(f'  {candidates[0].LABEL} owns id {id}')
1✔
401
            return candidates[0]
1✔
402

403
        # step 3: look for existing Objects in the datastore
404
        #
405
        # note that we don't currently see if this is a copy id because I have FUD
406
        # over which Protocol for_id should return in that case...and also because a
407
        # protocol may already say definitively above that it owns the id, eg ATProto
408
        # with DIDs and at:// URIs.
409
        obj = Protocol.load(id, remote=False)
1✔
410
        if obj and obj.source_protocol:
1✔
411
            logger.debug(f'  {obj.key.id()} owned by source_protocol {obj.source_protocol}')
1✔
412
            return PROTOCOLS[obj.source_protocol]
1✔
413

414
        # step 4: fetch over the network, if necessary
415
        if not remote:
1✔
416
            return None
1✔
417

418
        for protocol in candidates:
1✔
419
            logger.debug(f'Trying {protocol.LABEL}')
1✔
420
            try:
1✔
421
                obj = protocol.load(id, local=False, remote=True)
1✔
422

423
                if protocol.ABBREV == 'web':
1✔
424
                    # for web, if we fetch and get HTML without microformats,
425
                    # load returns False but the object will be stored in the
426
                    # datastore with source_protocol web, and in cache. load it
427
                    # again manually to check for that.
428
                    obj = Object.get_by_id(id)
1✔
429
                    if obj and obj.source_protocol != 'web':
1✔
430
                        obj = None
×
431

432
                if obj:
1✔
433
                    logger.debug(f'  {protocol.LABEL} owns id {id}')
1✔
434
                    return protocol
1✔
435
            except BadGateway:
1✔
436
                # we tried and failed fetching the id over the network.
437
                # this depends on ActivityPub.fetch raising this!
438
                return None
1✔
439
            except HTTPException as e:
×
440
                # internal error we generated ourselves; try next protocol
441
                pass
×
442
            except Exception as e:
×
443
                code, _ = util.interpret_http_exception(e)
×
444
                if code:
×
445
                    # we tried and failed fetching the id over the network
446
                    return None
×
447
                raise
×
448

449
        logger.info(f'No matching protocol found for {id} !')
1✔
450
        return None
1✔
451

452
    @cached(LRUCache(20000), lock=Lock())
1✔
453
    @staticmethod
1✔
454
    def for_handle(handle):
1✔
455
        """Returns the protocol for a given handle.
456

457
        May incur expensive side effects like resolving the handle itself over
458
        the network or other discovery.
459

460
        Args:
461
          handle (str)
462

463
        Returns:
464
          (Protocol subclass, str) tuple: matching protocol and optional id (if
465
          resolved), or ``(None, None)`` if no known protocol owns this handle
466
        """
467
        # TODO: normalize, eg convert domains to lower case
468
        logger.debug(f'Determining protocol for handle {handle}')
1✔
469
        if not handle:
1✔
470
            return (None, None)
1✔
471

472
        # step 1: check if any Protocols say conclusively that they own it.
473
        # sort to be deterministic.
474
        protocols = sorted(set(p for p in PROTOCOLS.values() if p),
1✔
475
                           key=lambda p: p.LABEL)
476
        candidates = []
1✔
477
        for proto in protocols:
1✔
478
            owns = proto.owns_handle(handle)
1✔
479
            if owns:
1✔
480
                logger.debug(f'  {proto.LABEL} owns handle {handle}')
1✔
481
                return (proto, None)
1✔
482
            elif owns is not False:
1✔
483
                candidates.append(proto)
1✔
484

485
        if len(candidates) == 1:
1✔
486
            logger.debug(f'  {candidates[0].LABEL} owns handle {handle}')
1✔
487
            return (candidates[0], None)
1✔
488

489
        # step 2: look for matching User in the datastore
490
        for proto in candidates:
1✔
491
            user = proto.query(proto.handle == handle).get()
1✔
492
            if user:
1✔
493
                if user.status:
1✔
494
                    return (None, None)
1✔
495
                logger.debug(f'  user {user.key} handle {handle}')
1✔
496
                return (proto, user.key.id())
1✔
497

498
        # step 3: resolve handle to id
499
        for proto in candidates:
1✔
500
            id = proto.handle_to_id(handle)
1✔
501
            if id:
1✔
502
                logger.debug(f'  {proto.LABEL} resolved handle {handle} to id {id}')
1✔
503
                return (proto, id)
1✔
504

505
        logger.info(f'No matching protocol found for handle {handle} !')
1✔
506
        return (None, None)
1✔
507

508
    @classmethod
1✔
509
    def is_user_at_domain(cls, handle, allow_internal=False):
1✔
510
        """Returns True if handle is formatted ``user@domain.tld``, False otherwise.
511

512
        Example: ``@user@instance.com``
513

514
        Args:
515
          handle (str)
516
          allow_internal (bool): whether the domain can be a Bridgy Fed domain
517
        """
518
        parts = handle.split('@')
1✔
519
        if len(parts) != 2:
1✔
520
            return False
1✔
521

522
        user, domain = parts
1✔
523
        return bool(user and domain
1✔
524
                    and not cls.is_blocklisted(domain, allow_internal=allow_internal))
525

526
    @classmethod
1✔
527
    def bridged_web_url_for(cls, user, fallback=False):
1✔
528
        """Returns the web URL for a user's bridged profile in this protocol.
529

530
        For example, for Web user ``alice.com``, :meth:`ATProto.bridged_web_url_for`
531
        returns ``https://bsky.app/profile/alice.com.web.brid.gy``
532

533
        Args:
534
          user (models.User)
535
          fallback (bool): if True, and bridged users have no canonical user
536
            profile URL in this protocol, return the native protocol's profile URL
537

538
        Returns:
539
          str, or None if there isn't a canonical URL
540
        """
541
        if fallback:
1✔
542
            return user.web_url()
1✔
543

544
    @classmethod
1✔
545
    def actor_key(cls, obj, allow_opt_out=False):
1✔
546
        """Returns the :class:`User`: key for a given object's author or actor.
547

548
        Args:
549
          obj (models.Object)
550
          allow_opt_out (bool): whether to return a user key if they're opted out
551

552
        Returns:
553
          google.cloud.ndb.key.Key or None:
554
        """
555
        owner = as1.get_owner(obj.as1)
1✔
556
        if owner:
1✔
557
            return cls.key_for(owner, allow_opt_out=allow_opt_out)
1✔
558

559
    @classmethod
1✔
560
    def bot_user_id(cls):
1✔
561
        """Returns the Web user id for the bot user for this protocol.
562

563
        For example, ``'bsky.brid.gy'`` for ATProto.
564

565
        Returns:
566
          str:
567
        """
568
        return f'{cls.ABBREV}{SUPERDOMAIN}'
1✔
569

570
    @classmethod
1✔
571
    def create_for(cls, user):
1✔
572
        """Creates or re-activate a copy user in this protocol.
573

574
        Should add the copy user to :attr:`copies`.
575

576
        If the copy user already exists and active, should do nothing.
577

578
        Args:
579
          user (models.User): original source user. Shouldn't already have a
580
            copy user for this protocol in :attr:`copies`.
581

582
        Raises:
583
          ValueError: if we can't create a copy of the given user in this protocol
584
        """
585
        raise NotImplementedError()
×
586

587
    @classmethod
1✔
588
    def send(to_cls, obj, target, from_user=None, orig_obj_id=None):
1✔
589
        """Sends an outgoing activity.
590

591
        To be implemented by subclasses. Should call
592
        ``to_cls.translate_ids(obj.as1)`` before converting it to this Protocol's
593
        format.
594

595
        NOTE: if this protocol's ``HAS_COPIES`` is True, and this method creates a
596
        copy and sends it, it *must* add that copy to the *object*'s (not activity's)
597
        :attr:`copies`, and store it back in the datastore, *in a transaction*!
598

599
        Args:
600
          obj (models.Object): with activity to send
601
          target (str): destination URL to send to
602
          from_user (models.User): user (actor) this activity is from
603
          orig_obj_id (str): :class:`models.Object` key id of the "original object"
604
            that this object refers to, eg replies to or reposts or likes
605

606
        Returns:
607
          bool: True if the activity is sent successfully, False if it is
608
          ignored or otherwise unsent due to protocol logic, eg no webmention
609
          endpoint, protocol doesn't support the activity type. (Failures are
610
          raised as exceptions.)
611

612
        Raises:
613
          werkzeug.HTTPException if the request fails
614
        """
615
        raise NotImplementedError()
×
616

617
    @classmethod
1✔
618
    def fetch(cls, obj, **kwargs):
1✔
619
        """Fetches a protocol-specific object and populates it in an :class:`Object`.
620

621
        Errors are raised as exceptions. If this method returns False, the fetch
622
        didn't fail but didn't succeed either, eg the id isn't valid for this
623
        protocol, or the fetch didn't return valid data for this protocol.
624

625
        To be implemented by subclasses.
626

627
        Args:
628
          obj (models.Object): with the id to fetch. Data is filled into one of
629
            the protocol-specific properties, eg ``as2``, ``mf2``, ``bsky``.
630
          kwargs: subclass-specific
631

632
        Returns:
633
          bool: True if the object was fetched and populated successfully,
634
          False otherwise
635

636
        Raises:
637
          requests.RequestException, werkzeug.HTTPException,
638
          websockets.WebSocketException, etc: if the fetch fails
639
        """
640
        raise NotImplementedError()
×
641

642
    @classmethod
1✔
643
    def convert(cls, obj, from_user=None, **kwargs):
1✔
644
        """Converts an :class:`Object` to this protocol's data format.
645

646
        For example, an HTML string for :class:`Web`, or a dict with AS2 JSON
647
        and ``application/activity+json`` for :class:`ActivityPub`.
648

649
        Just passes through to :meth:`_convert`, then does minor
650
        protocol-independent postprocessing.
651

652
        Args:
653
          obj (models.Object):
654
          from_user (models.User): user (actor) this activity/object is from
655
          kwargs: protocol-specific, passed through to :meth:`_convert`
656

657
        Returns:
658
          converted object in the protocol's native format, often a dict, or None
659
        """
660
        if not obj or not obj.as1:
1✔
661
            return None
1✔
662

663
        id = obj.key.id() if obj.key else obj.as1.get('id')
1✔
664
        is_crud = obj.as1.get('verb') in as1.CRUD_VERBS
1✔
665
        base_obj = as1.get_object(obj.as1) if is_crud else obj.as1
1✔
666
        orig_our_as1 = obj.our_as1
1✔
667

668
        # post-processing for user profiles
669
        if (from_user and from_user.is_profile(obj)
1✔
670
                and PROTOCOLS.get(obj.source_protocol) != cls
671
                and Protocol.for_bridgy_subdomain(id) not in DOMAINS):
672
            # TODO: more systematic way to get this that covers all protocols,
673
            # eg Nostr NIP-05
674
            web_opted_in = (from_user.LABEL == 'web' and
1✔
675
                            (from_user.last_webmention_in
676
                             or from_user.has_redirects
677
                             or from_user.handle_as('atproto') == from_user.key.id()))
678
            if not web_opted_in:
1✔
679
                # mark bridged actors as bots and add "bridged by Bridgy Fed" to
680
                # their bios. (web users are special cased, they don't get the label
681
                # if they've explicitly enabled Bridgy Fed with redirects or
682
                # webmentions.)
683
                cls.add_source_links(obj=obj, from_user=from_user)
1✔
684

685
                # web is currently opt out, so add [Unofficial] to their display name
686
                # to be explicit that they may not have enabled this themselves
687
                if from_user.LABEL == 'web':
1✔
688
                    if obj.our_as1 is orig_our_as1:
1✔
689
                        obj.our_as1 = copy.deepcopy(obj.as1)
×
690
                    actor = as1.get_object(obj.our_as1) if is_crud else obj.our_as1
1✔
691
                    if ((name := actor.get('displayName'))
1✔
692
                            and not name.endswith(' [Unofficial]')):
693
                        actor['displayName'] = f'{name} [Unofficial]'
1✔
694

695
        converted = cls._convert(obj, from_user=from_user, **kwargs)
1✔
696
        obj.our_as1 = orig_our_as1
1✔
697
        return converted
1✔
698

699
    @classmethod
1✔
700
    def _convert(cls, obj, from_user=None, **kwargs):
1✔
701
        """Converts an :class:`Object` to this protocol's data format.
702

703
        To be implemented by subclasses. Implementations should generally call
704
        :meth:`Protocol.translate_ids` (as their own class) before converting to
705
        their format.
706

707
        Args:
708
          obj (models.Object):
709
          from_user (models.User): user (actor) this activity/object is from
710
          kwargs: protocol-specific
711

712
        Returns:
713
          converted object in the protocol's native format, often a dict. May
714
            return the ``{}`` empty dict if the object can't be converted.
715
        """
716
        raise NotImplementedError()
×
717

718
    @classmethod
1✔
719
    def add_source_links(cls, obj, from_user):
1✔
720
        """Adds "bridged from ... by Bridgy Fed" to the user's actor's ``summary``.
721

722
        Uses HTML for protocols that support it, plain text otherwise.
723

724
        Args:
725
          cls (Protocol subclass): protocol that the user is bridging into
726
          obj (models.Object): user's actor/profile object
727
          from_user (models.User): user (actor) this activity/object is from
728
        """
729
        assert obj and obj.as1
1✔
730
        assert from_user
1✔
731

732
        obj.our_as1 = copy.deepcopy(obj.as1)
1✔
733
        actor = (as1.get_object(obj.as1) if obj.type in as1.CRUD_VERBS
1✔
734
                 else obj.as1)
735
        actor.setdefault('objectType', 'person')
1✔
736

737
        orig_summary = actor.setdefault('summary', '')
1✔
738
        summary_text = html_to_text(orig_summary, ignore_links=True)
1✔
739

740
        # Check if we've already added source links
741
        if '🌉 bridged' in summary_text:
1✔
742
            return
1✔
743

744
        actor_id = actor.get('id')
1✔
745

746
        url = (as1.get_url(actor)
1✔
747
               or (from_user.web_url() if from_user.profile_id() == actor_id
748
                   else actor_id))
749

750
        from web import Web
1✔
751
        bot_user = Web.get_by_id(from_user.bot_user_id())
1✔
752

753
        if cls.HTML_PROFILES:
1✔
754
            if bot_user and from_user.LABEL not in cls.DEFAULT_ENABLED_PROTOCOLS:
1✔
755
                mention = bot_user.html_link(proto=cls, name=False, handle='short')
1✔
756
                suffix = f', follow {mention} to interact'
1✔
757
            else:
758
                suffix = f' by <a href="https://{PRIMARY_DOMAIN}/">Bridgy Fed</a>'
1✔
759

760
            separator = '<br><br>'
1✔
761

762
            is_user = from_user.key and actor_id in (from_user.key.id(),
1✔
763
                                                     from_user.profile_id())
764
            if is_user:
1✔
765
                bridged = f'🌉 <a href="https://{PRIMARY_DOMAIN}{from_user.user_page_path()}">bridged</a>'
1✔
766
                from_ = f'<a href="{from_user.web_url()}">{from_user.handle}</a>'
1✔
767
            else:
768
                bridged = '🌉 bridged'
×
769
                from_ = util.pretty_link(url) if url else '?'
×
770

771
        else:  # plain text
772
            # TODO: unify with above. which is right?
773
            id = obj.key.id() if obj.key else obj.our_as1.get('id')
1✔
774
            is_user = from_user.key and id in (from_user.key.id(),
1✔
775
                                               from_user.profile_id())
776
            from_ = (from_user.web_url() if is_user else url) or '?'
1✔
777

778
            bridged = '🌉 bridged'
1✔
779
            suffix = (
1✔
780
                f': https://{PRIMARY_DOMAIN}{from_user.user_page_path()}'
781
                # link web users to their user pages
782
                if from_user.LABEL == 'web'
783
                else f', follow @{bot_user.handle_as(cls)} to interact'
784
                if bot_user and from_user.LABEL not in cls.DEFAULT_ENABLED_PROTOCOLS
785
                else f' by https://{PRIMARY_DOMAIN}/')
786
            separator = '\n\n'
1✔
787
            orig_summary = summary_text
1✔
788

789
        logo = f'{from_user.LOGO_EMOJI} ' if from_user.LOGO_EMOJI else ''
1✔
790
        source_links = f'{separator if orig_summary else ""}{bridged} from {logo}{from_}{suffix}'
1✔
791
        actor['summary'] = orig_summary + source_links
1✔
792

793
    @classmethod
1✔
794
    def set_username(to_cls, user, username):
1✔
795
        """Sets a custom username for a user's bridged account in this protocol.
796

797
        Args:
798
          user (models.User)
799
          username (str)
800

801
        Raises:
802
          ValueError: if the username is invalid
803
          RuntimeError: if the username could not be set
804
        """
805
        raise NotImplementedError()
1✔
806

807
    @classmethod
1✔
808
    def migrate_out(cls, user, to_user_id):
1✔
809
        """Migrates a bridged account out to be a native account.
810

811
        Args:
812
          user (models.User)
813
          to_user_id (str)
814

815
        Raises:
816
          ValueError: eg if this protocol doesn't own ``to_user_id``, or if
817
            ``user`` is on this protocol or not bridged to this protocol
818
        """
819
        raise NotImplementedError()
×
820

821
    @classmethod
1✔
822
    def check_can_migrate_out(cls, user, to_user_id):
1✔
823
        """Raises an exception if a user can't yet migrate to a native account.
824

825
        For example, if ``to_user_id`` isn't on this protocol, or if ``user`` is on
826
        this protocol, or isn't bridged to this protocol.
827

828
        If the user is ready to migrate, returns ``None``.
829

830
        Subclasses may override this to add more criteria, but they should call this
831
        implementation first.
832

833
        Args:
834
          user (models.User)
835
          to_user_id (str)
836

837
        Raises:
838
          ValueError: if ``user`` isn't ready to migrate to this protocol yet
839
        """
840
        def _error(msg):
1✔
841
            logger.warning(msg)
1✔
842
            raise ValueError(msg)
1✔
843

844
        if cls.owns_id(to_user_id) is False:
1✔
845
            _error(f"{to_user_id} doesn't look like an {cls.LABEL} id")
1✔
846
        elif isinstance(user, cls):
1✔
847
            _error(f"{user.handle_or_id()} is on {cls.PHRASE}")
1✔
848
        elif not user.is_enabled(cls):
1✔
849
            _error(f"{user.handle_or_id()} isn't currently bridged to {cls.PHRASE}")
1✔
850

851
    @classmethod
1✔
852
    def migrate_in(cls, user, from_user_id, **kwargs):
1✔
853
        """Migrates a native account in to be a bridged account.
854

855
        The protocol independent parts are done here; protocol-specific parts are
856
        done in :meth:`_migrate_in`, which this wraps.
857

858
        Reloads the user's profile before calling :meth:`_migrate_in`.
859

860
        Args:
861
          user (models.User): native user on another protocol to attach the
862
            newly imported bridged account to
863
          from_user_id (str)
864
          kwargs: additional protocol-specific parameters
865

866
        Raises:
867
          ValueError: eg if this protocol doesn't own ``from_user_id``, or if
868
            ``user`` is on this protocol or already bridged to this protocol
869
        """
870
        def _error(msg):
1✔
871
            logger.warning(msg)
1✔
872
            raise ValueError(msg)
1✔
873

874
        logger.info(f"Migrating in {from_user_id} for {user.key.id()}")
1✔
875

876
        # check req'ts
877
        if cls.owns_id(from_user_id) is False:
1✔
878
            _error(f"{from_user_id} doesn't look like an {cls.LABEL} id")
1✔
879
        elif isinstance(user, cls):
1✔
880
            _error(f"{user.handle_or_id()} is on {cls.PHRASE}")
1✔
881
        elif cls.HAS_COPIES and cls.LABEL in user.enabled_protocols:
1✔
882
            _error(f"{user.handle_or_id()} is already bridged to {cls.PHRASE}")
1✔
883

884
        # reload profile
885
        try:
1✔
886
            user.reload_profile()
1✔
887
        except (RequestException, HTTPException) as e:
×
888
            _, msg = util.interpret_http_exception(e)
×
889

890
        # migrate!
891
        cls._migrate_in(user, from_user_id, **kwargs)
1✔
892
        user.add('enabled_protocols', cls.LABEL)
1✔
893
        user.put()
1✔
894

895
        # attach profile object
896
        if user.obj:
1✔
897
            if cls.HAS_COPIES:
1✔
898
                profile_id = ids.profile_id(id=from_user_id, proto=cls)
1✔
899
                user.obj.remove_copies_on(cls)
1✔
900
                user.obj.add('copies', Target(uri=profile_id, protocol=cls.LABEL))
1✔
901
                user.obj.put()
1✔
902

903
            common.create_task(queue='receive', obj_id=user.obj_key.id(),
1✔
904
                               authed_as=user.key.id())
905

906
    @classmethod
1✔
907
    def _migrate_in(cls, user, from_user_id, **kwargs):
1✔
908
        """Protocol-specific parts of migrating in external account.
909

910
        Called by :meth:`migrate_in`, which does most of the work, including calling
911
        :meth:`reload_profile` before this.
912

913
        Args:
914
          user (models.User): native user on another protocol to attach the
915
            newly imported account to. Unused.
916
          from_user_id (str): DID of the account to be migrated in
917
          kwargs: protocol dependent
918
        """
919
        raise NotImplementedError()
×
920

921
    @classmethod
1✔
922
    def target_for(cls, obj, shared=False):
1✔
923
        """Returns an :class:`Object`'s delivery target (endpoint).
924

925
        To be implemented by subclasses.
926

927
        Examples:
928

929
        * If obj has ``source_protocol`` ``web``, returns its URL, as a
930
          webmention target.
931
        * If obj is an ``activitypub`` actor, returns its inbox.
932
        * If obj is an ``activitypub`` object, returns it's author's or actor's
933
          inbox.
934

935
        Args:
936
          obj (models.Object):
937
          shared (bool): optional. If True, returns a common/shared
938
            endpoint, eg ActivityPub's ``sharedInbox``, that can be reused for
939
            multiple recipients for efficiency
940

941
        Returns:
942
          str: target endpoint, or None if not available.
943
        """
944
        raise NotImplementedError()
×
945

946
    @classmethod
1✔
947
    def is_blocklisted(cls, url, allow_internal=False):
1✔
948
        """Returns True if we block the given URL and shouldn't deliver to it.
949

950
        Default implementation here, subclasses may override.
951

952
        Args:
953
          url (str):
954
          allow_internal (bool): whether to return False for internal domains
955
            like ``fed.brid.gy``, ``bsky.brid.gy``, etc
956
        """
957
        blocklist = DOMAIN_BLOCKLIST
1✔
958
        if not DEBUG:
1✔
959
            blocklist += tuple(util.RESERVED_TLDS | util.LOCAL_TLDS)
1✔
960
        if not allow_internal:
1✔
961
            blocklist += DOMAINS
1✔
962
        return util.domain_or_parent_in(url, blocklist)
1✔
963

964
    @classmethod
1✔
965
    def translate_ids(to_cls, obj):
1✔
966
        """Translates all ids in an AS1 object to a specific protocol.
967

968
        Infers source protocol for each id value separately.
969

970
        For example, if ``proto`` is :class:`ActivityPub`, the ATProto URI
971
        ``at://did:plc:abc/coll/123`` will be converted to
972
        ``https://bsky.brid.gy/ap/at://did:plc:abc/coll/123``.
973

974
        Wraps these AS1 fields:
975

976
        * ``id``
977
        * ``actor``
978
        * ``author``
979
        * ``bcc``
980
        * ``bto``
981
        * ``cc``
982
        * ``featured[].items``, ``featured[].orderedItems``
983
        * ``object``
984
        * ``object.actor``
985
        * ``object.author``
986
        * ``object.id``
987
        * ``object.inReplyTo``
988
        * ``object.object``
989
        * ``attachments[].id``
990
        * ``tags[objectType=mention].url``
991
        * ``to``
992

993
        This is the inverse of :meth:`models.Object.resolve_ids`. Much of the
994
        same logic is duplicated there!
995

996
        TODO: unify with :meth:`Object.resolve_ids`,
997
        :meth:`models.Object.normalize_ids`.
998

999
        Args:
1000
          to_proto (Protocol subclass)
1001
          obj (dict): AS1 object or activity (not :class:`models.Object`!)
1002

1003
        Returns:
1004
          dict: translated AS1 version of ``obj``
1005
        """
1006
        from ui import UIProtocol
1✔
1007

1008
        assert to_cls != Protocol
1✔
1009
        if not obj:
1✔
1010
            return obj
1✔
1011

1012
        outer_obj = to_cls.translate_mention_handles(copy.deepcopy(obj))
1✔
1013
        inner_objs = outer_obj['object'] = as1.get_objects(outer_obj)
1✔
1014

1015
        def translate(elem, field, fn, uri=False):
1✔
1016
            owner_id = as1.get_owner(elem)
1✔
1017
            owner_proto = Protocol.for_id(owner_id)
1✔
1018

1019
            elem[field] = as1.get_objects(elem, field)
1✔
1020
            for obj in elem[field]:
1✔
1021
                if id := obj.get('id'):
1✔
1022
                    if field in ('to', 'cc', 'bcc', 'bto') and as1.is_audience(id):
1✔
1023
                        continue
1✔
1024

1025
                    from_cls = Protocol.for_id(id)
1✔
1026
                    if field == 'id' and from_cls == UIProtocol and owner_proto:
1✔
1027
                        logger.info(f'owner of {id} {owner_id} is {owner_proto.LABEL}, translating id from that protocol')
1✔
1028
                        from_cls = owner_proto
1✔
1029

1030
                    # TODO: what if from_cls is None? relax translate_object_id,
1031
                    # make it a noop if we don't know enough about from/to?
1032
                    if from_cls and from_cls != to_cls:
1✔
1033
                        obj['id'] = fn(id=id, from_=from_cls, to=to_cls)
1✔
1034
                    if uri:
1✔
1035
                        obj['id'] = to_cls(id=obj['id']).id_uri() if obj['id'] else id
1✔
1036

1037
            elem[field] = [o['id'] if o.keys() == {'id'} else o
1✔
1038
                           for o in elem[field]]
1039

1040
            if len(elem[field]) == 1 and field not in ('items', 'orderedItems'):
1✔
1041
                elem[field] = elem[field][0]
1✔
1042

1043
        type = as1.object_type(outer_obj)
1✔
1044
        translate(outer_obj, 'id',
1✔
1045
                  ids.translate_user_id if type in as1.ACTOR_TYPES
1046
                  else ids.translate_object_id)
1047

1048
        for o in inner_objs:
1✔
1049
            is_actor = (as1.object_type(o) in as1.ACTOR_TYPES
1✔
1050
                        or as1.get_owner(outer_obj) == o.get('id')
1051
                        or type in ('follow', 'stop-following', 'block'))
1052
            translate(o, 'id', (ids.translate_user_id if is_actor
1✔
1053
                                else ids.translate_object_id))
1054
            # TODO: need to handle both user and object ids here
1055
            # https://github.com/snarfed/bridgy-fed/issues/2281
1056
            obj_is_actor = o.get('verb') in as1.VERBS_WITH_ACTOR_OBJECT
1✔
1057
            translate(o, 'object', (ids.translate_user_id if obj_is_actor
1✔
1058
                                    else ids.translate_object_id))
1059

1060
        for o in [outer_obj] + inner_objs:
1✔
1061
            translate(o, 'inReplyTo', ids.translate_object_id)
1✔
1062
            for field in 'actor', 'author', 'to', 'cc', 'bto', 'bcc':
1✔
1063
                translate(o, field, ids.translate_user_id)
1✔
1064
            for tag in as1.get_objects(o, 'tags'):
1✔
1065
                if tag.get('objectType') == 'mention':
1✔
1066
                    translate(tag, 'url', ids.translate_user_id, uri=True)
1✔
1067
            for att in as1.get_objects(o, 'attachments'):
1✔
1068
                translate(att, 'id', ids.translate_object_id)
1✔
1069
                url = att.get('url')
1✔
1070
                if url and not att.get('id'):
1✔
1071
                    if from_cls := Protocol.for_id(url):
1✔
1072
                        att['id'] = ids.translate_object_id(from_=from_cls, to=to_cls,
1✔
1073
                                                            id=url)
1074
            if feat := as1.get_object(o, 'featured'):
1✔
1075
                translate(feat, 'orderedItems', ids.translate_object_id)
1✔
1076
                translate(feat, 'items', ids.translate_object_id)
1✔
1077

1078
        outer_obj = util.trim_nulls(outer_obj)
1✔
1079

1080
        if objs := util.get_list(outer_obj ,'object'):
1✔
1081
            outer_obj['object'] = [o['id'] if o.keys() == {'id'} else o for o in objs]
1✔
1082
            if len(outer_obj['object']) == 1:
1✔
1083
                outer_obj['object'] = outer_obj['object'][0]
1✔
1084

1085
        return outer_obj
1✔
1086

1087
    @classmethod
1✔
1088
    def translate_mention_handles(cls, obj):
1✔
1089
        """Translates @-mentions in ``obj.content`` to this protocol's handles.
1090

1091
        Specifically, for each ``mention`` tag in the object's tags that has
1092
        ``startIndex`` and ``length``, replaces it in ``obj.content`` with that
1093
        user's translated handle in this protocol and updates the tag's location.
1094

1095
        Called by :meth:`Protocol.translate_ids`.
1096

1097
        If ``obj.content`` is HTML, does nothing.
1098

1099
        Args:
1100
          obj (dict): AS1 object
1101

1102
        Returns:
1103
          dict: modified AS1 object
1104
        """
1105
        if not obj:
1✔
1106
            return None
×
1107

1108
        obj = copy.deepcopy(obj)
1✔
1109
        obj['object'] = [cls.translate_mention_handles(o)
1✔
1110
                                for o in as1.get_objects(obj)]
1111
        if len(obj['object']) == 1:
1✔
1112
            obj['object'] = obj['object'][0]
1✔
1113

1114
        content = obj.get('content')
1✔
1115
        tags = obj.get('tags')
1✔
1116
        if (not content or not tags
1✔
1117
                or obj.get('content_is_html')
1118
                or bool(BeautifulSoup(content, 'html.parser').find())
1119
                or HTML_ENTITY_RE.search(content)):
1120
            return util.trim_nulls(obj)
1✔
1121

1122
        indexed = [tag for tag in tags if tag.get('startIndex') and tag.get('length')]
1✔
1123

1124
        offset = 0
1✔
1125
        last_orig_end = 0
1✔
1126
        for tag in sorted(indexed, key=lambda t: t['startIndex']):
1✔
1127
            orig_start = tag['startIndex']
1✔
1128
            if orig_start < last_orig_end:
1✔
1129
                logger.warning(f'tags overlap! removing indices from {tag.get("url")}')
1✔
1130
                del tag['startIndex']
1✔
1131
                del tag['length']
1✔
1132
                continue
1✔
1133

1134
            orig_end = orig_start + tag['length']
1✔
1135
            last_orig_end = orig_end
1✔
1136
            tag['startIndex'] += offset
1✔
1137
            if tag.get('objectType') == 'mention' and (id := tag['url']):
1✔
1138
                if proto := Protocol.for_id(id):
1✔
1139
                    id = ids.normalize_user_id(id=id, proto=proto)
1✔
1140
                    if key := get_original_user_key(id):
1✔
1141
                        user = key.get()
×
1142
                    else:
1143
                        user = proto.get_or_create(id, allow_opt_out=True)
1✔
1144
                    if user:
1✔
1145
                        start = tag['startIndex']
1✔
1146
                        end = start + tag['length']
1✔
1147
                        if handle := user.handle_as(cls):
1✔
1148
                            content = content[:start] + handle + content[end:]
1✔
1149
                            offset += len(handle) - tag['length']
1✔
1150
                            tag.update({
1✔
1151
                                'displayName': handle,
1152
                                'length': len(handle),
1153
                            })
1154

1155
        obj['tags'] = tags
1✔
1156
        as2.set_content(obj, content)  # sets content *and* contentMap; obj is still AS1 here
1✔
1157
        return util.trim_nulls(obj)
1✔
1158

1159
    @classmethod
1✔
1160
    def receive(from_cls, obj, authed_as=None, internal=False, received_at=None):
1✔
1161
        """Handles an incoming activity.
1162

1163
        If ``obj``'s key is unset, ``obj.as1``'s id field is used. If both are
1164
        unset, returns HTTP 299.
1165

1166
        Args:
1167
          obj (models.Object)
1168
          authed_as (str): authenticated actor id who sent this activity
1169
          internal (bool): whether to allow activity ids on internal domains,
1170
            from opted out/blocked users, etc.
1171
          received_at (datetime): when we first saw (received) this activity.
1172
            Right now only used for monitoring.
1173

1174
        Returns:
1175
          (str, int) tuple: (response body, HTTP status code) Flask response
1176

1177
        Raises:
1178
          werkzeug.HTTPException: if the request is invalid
1179
        """
1180
        # check some invariants
1181
        assert from_cls != Protocol
1✔
1182
        assert isinstance(obj, Object), obj
1✔
1183

1184
        if not obj.as1:
1✔
1185
            error('No object data provided')
1✔
1186

1187
        orig_obj = obj
1✔
1188
        id = None
1✔
1189
        if obj.key and obj.key.id():
1✔
1190
            id = obj.key.id()
1✔
1191

1192
        if not id:
1✔
1193
            id = obj.as1.get('id')
1✔
1194
            obj.key = ndb.Key(Object, id)
1✔
1195

1196
        if not id:
1✔
1197
            error('No id provided')
×
1198
        elif from_cls.owns_id(id) is False:
1✔
1199
            error(f'Protocol {from_cls.LABEL} does not own id {id}')
1✔
1200
        elif from_cls.is_blocklisted(id, allow_internal=internal):
1✔
1201
            error(f'{id} is blocklisted')
1✔
1202

1203
        # does this protocol support this activity/object type?
1204
        from_cls.check_supported(obj, 'receive')
1✔
1205

1206
        # lease this object, atomically
1207
        memcache_key = activity_id_memcache_key(id)
1✔
1208
        leased = memcache.memcache.add(
1✔
1209
            memcache_key, 'leased', noreply=False,
1210
            expire=int(MEMCACHE_LEASE_EXPIRATION.total_seconds()))
1211

1212
        # short circuit if we've already seen this activity id
1213
        if ('force' not in request.values
1✔
1214
            and (not leased
1215
                 or (obj.new is False and obj.changed is False))):
1216
            error(f'Already seen', status=204)
1✔
1217

1218
        pruned = {k: v for k, v in obj.as1.items()
1✔
1219
                  if k not in ('contentMap', 'replies', 'signature')}
1220
        delay = ''
1✔
1221
        retry = request.headers.get('X-AppEngine-TaskRetryCount')
1✔
1222
        if (received_at and retry in (None, '0')
1✔
1223
                and obj.type not in ('delete', 'undo')):  # we delay deletes/undos
1224
            delay_s = int((util.now().replace(tzinfo=None)
1✔
1225
                           - received_at.replace(tzinfo=None)
1226
                           ).total_seconds())
1227
            delay = f'({delay_s} s behind)'
1✔
1228
        logger.info(f'Receiving {from_cls.LABEL} {obj.type} {id} {delay} AS1: {json_dumps(pruned, indent=2)}')
1✔
1229

1230
        # check authorization
1231
        # https://www.w3.org/wiki/ActivityPub/Primer/Authentication_Authorization
1232
        actor = as1.get_owner(obj.as1)
1✔
1233
        if not actor:
1✔
1234
            error('Activity missing actor or author')
1✔
1235

1236
        if not (from_user_cls := obj.owner_protocol()):
1✔
1237
            error(f"couldn't determine owner protocol for {obj.key.id()} source_protocol {obj.source_protocol}", status=204)
×
1238
        elif from_user_cls.owns_id(actor) is False:
1✔
1239
            error(f"{from_user_cls.LABEL} doesn't own actor {actor}, this is probably a bridged activity. Skipping.", status=204)
1✔
1240

1241
        assert authed_as
1✔
1242
        assert isinstance(authed_as, str)
1✔
1243
        authed_as = ids.normalize_user_id(id=authed_as, proto=from_user_cls)
1✔
1244
        actor = ids.normalize_user_id(id=actor, proto=from_user_cls)
1✔
1245
        if actor != authed_as and not internal:
1✔
1246
            report_error("Auth: receive: authed_as doesn't match owner",
1✔
1247
                         user=f'{id} authed_as {authed_as} owner {actor}')
1248
            error(f"actor {actor} isn't authed user {authed_as}")
1✔
1249

1250
        # update copy ids to originals
1251
        obj.normalize_ids()
1✔
1252
        obj.resolve_ids()
1✔
1253

1254
        if (obj.type == 'follow'
1✔
1255
                and Protocol.for_bridgy_subdomain(as1.get_object(obj.as1).get('id'))):
1256
            # follows of bot user; refresh user profile first
1257
            logger.info(f'Follow of bot user, reloading {actor}')
1✔
1258
            from_user = from_user_cls.get_or_create(id=actor, allow_opt_out=True)
1✔
1259
            from_user.reload_profile()
1✔
1260
        else:
1261
            # load actor user
1262
            from_user = from_user_cls.get_or_create(id=actor, allow_opt_out=True)
1✔
1263

1264
        if not internal and (not from_user or from_user.manual_opt_out):
1✔
1265
            error(f"Couldn't load actor {actor}", status=204)
×
1266

1267
        # apply protocol-specific filters
1268
        if 'force' not in request.values:
1✔
1269
            for filter in from_cls.RECEIVE_FILTERS:
1✔
1270
                if filter(obj, from_user):
1✔
1271
                    error(f'Activity {id} blocked by filter {filter.__name__}')
1✔
1272

1273
        # check if this is a profile object coming in via a user with use_instead
1274
        # set. if so, override the object's id to be the final user id (from_user's),
1275
        # after following use_instead.
1276
        if obj.type in as1.ACTOR_TYPES and from_user.key.id() != actor:
1✔
1277
            as1_id = obj.as1.get('id')
1✔
1278
            if ids.normalize_user_id(id=as1_id, proto=from_user) == actor:
1✔
1279
                logger.info(f'Overriding AS1 object id {as1_id} with Object id {from_user.profile_id()}')
1✔
1280
                obj.our_as1 = {**obj.as1, 'id': from_user.profile_id()}
1✔
1281

1282
        # if this is an object, ie not an activity, wrap it in a create or update
1283
        obj = from_cls.handle_bare_object(obj, authed_as=authed_as,
1✔
1284
                                          from_user=from_user)
1285
        obj.add('users', from_user.key)
1✔
1286

1287
        inner_obj_as1 = as1.get_object(obj.as1)
1✔
1288
        inner_obj_id = inner_obj_as1.get('id')
1✔
1289
        if obj.type in as1.CRUD_VERBS | as1.VERBS_WITH_OBJECT:
1✔
1290
            if not inner_obj_id:
1✔
1291
                error(f'{obj.type} object has no id!')
1✔
1292

1293
        # check age. we support backdated posts, but if they're over 2w old, we
1294
        # don't deliver them
1295
        if obj.type == 'post':
1✔
1296
            if published := inner_obj_as1.get('published'):
1✔
1297
                try:
1✔
1298
                    published_dt = util.parse_iso8601(published)
1✔
1299
                    if not published_dt.tzinfo:
1✔
1300
                        published_dt = published_dt.replace(tzinfo=timezone.utc)
×
1301
                    age = util.now() - published_dt
1✔
1302
                    if (age > CREATE_MAX_AGE
1✔
1303
                            and 'force' not in request.values
1304
                            and not util.domain_or_parent_in(
1305
                                from_user.key.id(), CREATE_MAX_AGE_EXEMPT_DOMAINS)):
1306
                        error(f'Ignoring, too old, {age} is over {CREATE_MAX_AGE}',
×
1307
                              status=204)
1308
                except ValueError:  # from parse_iso8601
×
1309
                    logger.debug(f"Couldn't parse published {published}")
×
1310

1311
        # write Object to datastore
1312
        if obj.type in STORE_AS1_TYPES:
1✔
1313
            obj.put()
1✔
1314

1315
        # store inner object
1316
        # TODO: unify with big obj.type conditional below. would have to merge
1317
        # this with the DM handling block lower down.
1318
        crud_obj = None
1✔
1319
        if obj.type in ('post', 'update') and inner_obj_as1.keys() > set(['id']):
1✔
1320
            # normalize_ids may have converted the inner object id to a user id
1321
            # (eg Web profile URL to domain), so normalize back to the profile
1322
            # object id to find the right existing Object in the datastore
1323
            crud_obj_id = (ids.normalize_object_id(id=inner_obj_id, proto=from_cls)
1✔
1324
                           or inner_obj_id)
1325
            crud_obj = Object.get_or_create(crud_obj_id, our_as1=inner_obj_as1,
1✔
1326
                                            source_protocol=obj.source_protocol,
1327
                                            authed_as=actor, users=[from_user.key],
1328
                                            deleted=False)
1329

1330
        actor = as1.get_object(obj.as1, 'actor')
1✔
1331
        actor_id = actor.get('id')
1✔
1332

1333
        # handle activity!
1334
        if obj.type == 'stop-following':
1✔
1335
            # TODO: unify with handle_follow?
1336
            # TODO: handle multiple followees
1337
            if not actor_id or not inner_obj_id:
1✔
1338
                error(f'stop-following requires actor id and object id. Got: {actor_id} {inner_obj_id} {obj.as1}')
×
1339

1340
            # deactivate Follower
1341
            from_ = from_user_cls.key_for(actor_id)
1✔
1342
            if not (to_cls := Protocol.for_id(inner_obj_id)):
1✔
1343
                error(f"Can't determine protocol for {inner_obj_id} , giving up")
1✔
1344
            to = to_cls.key_for(inner_obj_id)
1✔
1345
            follower = Follower.query(Follower.to == to,
1✔
1346
                                      Follower.from_ == from_,
1347
                                      Follower.status == 'active').get()
1348
            if follower:
1✔
1349
                follow_id = obj.as1.get('followId')
1✔
1350
                if (follow_id and follower.follow
1✔
1351
                        and follower.follow.id() != follow_id):
1352
                    logger.info(f"Ignoring stop-following: its follow id {follow_id} doesn't match current {follower.follow.id()}")
1✔
1353
                    return 'OK', 204
1✔
1354
                logger.info(f'Marking {follower} inactive')
1✔
1355
                follower.status = 'inactive'
1✔
1356
                follower.put()
1✔
1357
            else:
1358
                logger.warning(f'No Follower found for {from_} => {to}')
1✔
1359

1360
            # fall through to deliver to followee
1361
            # TODO: do we convert stop-following to webmention 410 of original
1362
            # follow?
1363

1364
            # fall through to deliver to followers
1365

1366
        elif obj.type in ('delete', 'undo'):
1✔
1367
            delete_obj_id = (from_user.profile_id()
1✔
1368
                            if inner_obj_id == from_user.key.id()
1369
                            else inner_obj_id)
1370

1371
            delete_obj = Object.get_by_id(delete_obj_id, authed_as=authed_as)
1✔
1372
            if not delete_obj:
1✔
1373
                logger.info(f"Ignoring, we don't have {delete_obj_id} stored")
1✔
1374
                return 'OK', 204
1✔
1375

1376
            # TODO: just delete altogether!
1377
            logger.info(f'Marking Object {delete_obj_id} deleted')
1✔
1378
            delete_obj.deleted = True
1✔
1379
            delete_obj.put()
1✔
1380

1381
            # if this is an actor, handle deleting it later so that
1382
            # in case it's from_user, user.enabled_protocols is still populated
1383
            #
1384
            # fall through to deliver to followers and delete copy if necessary.
1385
            # should happen via protocol-specific copy target and send of
1386
            # delete activity.
1387
            # https://github.com/snarfed/bridgy-fed/issues/63
1388

1389
        elif obj.type == 'block':
1✔
1390
            if proto := Protocol.for_bridgy_subdomain(inner_obj_id):
1✔
1391
                # blocking protocol bot user disables that protocol
1392
                from_user.delete(proto)
1✔
1393
                from_user.disable_protocol(proto)
1✔
1394
                return 'OK', 200
1✔
1395

1396
        elif obj.type == 'move':
1✔
1397
            from_cls.handle_move(obj, from_user=from_user)
1✔
1398
            # fall through to deliver the Move activity to remaining followers
1399

1400
        elif obj.type == 'post':
1✔
1401
            # handle DMs to bot users
1402
            if as1.is_dm(obj.as1):
1✔
1403
                return dms.receive(from_user=from_user, obj=obj)
1✔
1404

1405
        # fetch actor if necessary
1406
        is_user = from_user.is_profile(orig_obj)
1✔
1407
        if (actor and actor.keys() == set(['id'])
1✔
1408
                and not is_user and obj.type not in ('delete', 'undo')):
1409
            logger.debug('Fetching actor so we have name, profile photo, etc')
1✔
1410
            actor_obj = from_user_cls.load(
1✔
1411
                ids.profile_id(id=actor['id'], proto=from_cls), raise_=False)
1412
            if actor_obj and actor_obj.as1:
1✔
1413
                obj.our_as1 = {
1✔
1414
                    **obj.as1, 'actor': {
1415
                        **actor_obj.as1,
1416
                        # override profile id with actor id
1417
                        # https://github.com/snarfed/bridgy-fed/issues/1720
1418
                        'id': actor['id'],
1419
                    }
1420
                }
1421

1422
        # fetch object if necessary
1423
        if (obj.type in ('post', 'update', 'share')
1✔
1424
                and inner_obj_as1.keys() == set(['id'])
1425
                and from_cls.owns_id(inner_obj_id) is not False):
1426
            logger.debug('Fetching inner object')
1✔
1427
            inner_obj = from_cls.load(inner_obj_id, raise_=False,
1✔
1428
                                      remote=(obj.type in ('post', 'update')))
1429
            if obj.type in ('post', 'update'):
1✔
1430
                crud_obj = inner_obj
1✔
1431
            if inner_obj and inner_obj.as1:
1✔
1432
                obj.our_as1 = {
1✔
1433
                    **obj.as1,
1434
                    'object': {
1435
                        **inner_obj_as1,
1436
                        **inner_obj.as1,
1437
                    }
1438
                }
1439
            elif obj.type in ('post', 'update'):
1✔
1440
                error(f"Need object {inner_obj_id} but couldn't fetch, giving up")
1✔
1441

1442
        if obj.type == 'follow':
1✔
1443
            if proto := Protocol.for_bridgy_subdomain(inner_obj_id):
1✔
1444
                # follow of one of our protocol bot users; enable that protocol.
1445
                # fall through so that we send an accept.
1446
                try:
1✔
1447
                    from_user.enable_protocol(proto)
1✔
1448
                except ErrorButDoNotRetryTask:
1✔
1449
                    from web import Web
1✔
1450
                    bot = Web.get_by_id(proto.bot_user_id())
1✔
1451
                    from_cls.respond_to_follow('reject', follower=from_user,
1✔
1452
                                               followee=bot, follow=obj)
1453
                    raise
1✔
1454
                proto.bot_maybe_follow_back(from_user)
1✔
1455
                from_cls.handle_follow(obj, from_user=from_user)
1✔
1456
                return 'OK', 202
1✔
1457

1458
            from_cls.handle_follow(obj, from_user=from_user)
1✔
1459

1460
        # on update of the user's own actor/profile, set user.obj and store user back
1461
        # to datastore so that we recalculate computed properties like status etc
1462
        if is_user:
1✔
1463
            if obj.type == 'update' and crud_obj:
1✔
1464
                logger.info(f"update of the user's profile, re-storing user with obj_key {crud_obj.key.id()}")
1✔
1465
                from_user.obj = crud_obj
1✔
1466
                from_user.put()
1✔
1467

1468
        # deliver to targets
1469
        resp = from_cls.deliver(obj, from_user=from_user, crud_obj=crud_obj)
1✔
1470

1471
        # on user deleting themselves, deactivate their followers/followings.
1472
        # https://github.com/snarfed/bridgy-fed/issues/1304
1473
        #
1474
        # do this *after* delivering because delivery finds targets based on
1475
        # stored Followers
1476
        if is_user and obj.type == 'delete':
1✔
1477
            for proto in from_user.enabled_protocols:
1✔
1478
                from_user.disable_protocol(PROTOCOLS[proto])
1✔
1479

1480
            logger.info(f'Deactivating Followers from or to {from_user.key.id()}')
1✔
1481
            followers = Follower.query(
1✔
1482
                OR(Follower.to == from_user.key, Follower.from_ == from_user.key)
1483
            ).fetch()
1484
            for f in followers:
1✔
1485
                f.status = 'inactive'
1✔
1486
            ndb.put_multi(followers)
1✔
1487

1488
        memcache.memcache.set(memcache_key, 'done', expire=7 * 24 * 60 * 60)  # 1w
1✔
1489
        return resp
1✔
1490

1491
    @classmethod
1✔
1492
    def handle_follow(from_cls, obj, from_user):
1✔
1493
        """Handles an incoming follow activity.
1494

1495
        Sends an ``Accept`` back, but doesn't send the ``Follow`` itself. That
1496
        happens in :meth:`deliver`.
1497

1498
        Args:
1499
          obj (models.Object): follow activity
1500
        """
1501
        logger.debug('Got follow. storing Follow(s), sending accept(s)')
1✔
1502
        from_id = from_user.key.id()
1✔
1503

1504
        # Prepare followee (to) users' data
1505
        to_as1s = as1.get_objects(obj.as1)
1✔
1506
        if not to_as1s:
1✔
1507
            error(f'Follow activity requires object(s). Got: {obj.as1}')
×
1508

1509
        # Store Followers
1510
        for to_as1 in to_as1s:
1✔
1511
            to_id = to_as1.get('id')
1✔
1512
            if not to_id:
1✔
1513
                error(f'Follow activity requires object(s). Got: {obj.as1}')
×
1514

1515
            logger.info(f'Follow {from_id} => {to_id}')
1✔
1516

1517
            to_cls = Protocol.for_id(to_id)
1✔
1518
            if not to_cls:
1✔
1519
                error(f"Couldn't determine protocol for {to_id}")
×
1520
            elif from_cls == to_cls:
1✔
1521
                logger.info(f'Skipping same-protocol Follower {from_id} => {to_id}')
1✔
1522
                continue
1✔
1523

1524
            to_key = to_cls.key_for(to_id)
1✔
1525
            if not to_key:
1✔
1526
                logger.info(f'Skipping invalid {to_cls.LABEL} user key: {to_id}')
×
1527
                continue
×
1528

1529
            to_user = to_cls.get_or_create(id=to_key.id())
1✔
1530
            if not to_user or not to_user.is_enabled(from_cls):
1✔
1531
                error(f'{to_id} not found')
1✔
1532

1533
            follower_obj = Follower.get_or_create(to=to_user, from_=from_user,
1✔
1534
                                                  follow=obj.key, status='active')
1535
            if (from_cls.USES_OBJECT_FEED
1✔
1536
                    and from_cls.LABEL not in to_user.has_object_feed_followers_on):
1537
                to_user.has_object_feed_followers_on.append(from_cls.LABEL)
1✔
1538
                to_user.put()
1✔
1539

1540
            obj.add('notify', to_key)
1✔
1541
            from_cls.respond_to_follow('accept', follower=from_user,
1✔
1542
                                       followee=to_user, follow=obj)
1543

1544
    @classmethod
1✔
1545
    def respond_to_follow(_, verb, follower, followee, follow):
1✔
1546
        """Sends an accept or reject activity for a follow.
1547

1548
        ...if the follower's protocol supports accepts/rejects. Otherwise, does
1549
        nothing.
1550

1551
        Args:
1552
          verb (str): ``accept`` or  ``reject``
1553
          follower (models.User)
1554
          followee (models.User)
1555
          follow (models.Object)
1556
        """
1557
        assert verb in ('accept', 'reject')
1✔
1558
        if verb not in follower.SUPPORTED_AS1_TYPES:
1✔
1559
            return
1✔
1560

1561
        if not follower.obj or not (target := follower.target_for(follower.obj)):
1✔
1562
            error(f"Couldn't find delivery target for follower {follower.key.id()}")
1✔
1563

1564
        # send. note that this is one response for the whole follow, even if it
1565
        # has multiple followees!
1566
        id = f'{followee.key.id()}/followers#{verb}-{follow.key.id()}'
1✔
1567
        accept = {
1✔
1568
            'id': id,
1569
            'objectType': 'activity',
1570
            'verb': verb,
1571
            'actor': followee.key.id(),
1572
            'object': follow.as1,
1573
        }
1574
        common.create_task(queue='send', id=id, our_as1=accept, url=target,
1✔
1575
                           protocol=follower.LABEL, user=followee.key.urlsafe())
1576

1577
    @classmethod
1✔
1578
    def bot_maybe_follow_back(bot_cls, user):
1✔
1579
        """Follow a user from a protocol bot user, if their protocol needs that.
1580

1581
        ...so that the protocol starts sending us their activities, if it needs
1582
        a follow for that (eg ActivityPub).
1583

1584
        Args:
1585
          user (User)
1586
        """
1587
        if not user.BOTS_FOLLOW_BACK:
1✔
1588
            return
1✔
1589

1590
        from web import Web
1✔
1591
        bot = Web.get_by_id(bot_cls.bot_user_id())
1✔
1592
        now = util.now().isoformat()
1✔
1593
        logger.info(f'Following {user.key.id()} back from bot user {bot.key.id()}')
1✔
1594

1595
        if not user.obj:
1✔
1596
            logger.info("  can't follow, user has no profile obj")
1✔
1597
            return
1✔
1598

1599
        target = user.target_for(user.obj)
1✔
1600
        follow_back_id = f'https://{bot.key.id()}/#follow-back-{user.key.id()}-{now}'
1✔
1601
        follow_back_as1 = {
1✔
1602
            'objectType': 'activity',
1603
            'verb': 'follow',
1604
            'id': follow_back_id,
1605
            'actor': bot.key.id(),
1606
            'object': user.key.id(),
1607
        }
1608
        common.create_task(queue='send', id=follow_back_id,
1✔
1609
                           our_as1=follow_back_as1, url=target,
1610
                           source_protocol='web', protocol=user.LABEL,
1611
                           user=bot.key.urlsafe())
1612

1613
    @classmethod
1✔
1614
    def handle_move(from_cls, obj, from_user):
1✔
1615
        """Handles an incoming move (account migration) activity.
1616

1617
        Updates all of the account's :class:`Follower`s to point to the new id.
1618

1619
        Args:
1620
          obj (models.Object): follow activity
1621
          from_user (models.User): user (actor) this activity/object is from
1622
        """
1623
        if not (target_id := as1.get_id(obj.as1, 'target')):
1✔
1624
            error(f'Move activity requires target. Got: {obj.as1}')
1✔
1625

1626
        logger.info(f'Got move activity from {from_user.key.id()} to {target_id}')
1✔
1627

1628
        # check that object is the actor (the account being moved)
1629
        actor_id = as1.get_id(obj.as1, 'actor')
1✔
1630
        object_id = as1.get_id(obj.as1, 'object')
1✔
1631
        if actor_id != object_id:
1✔
1632
            error(f"Move activity object {object_id} isn't actor {actor_id}")
1✔
1633

1634
        # get the target protocol and key
1635
        to_cls = Protocol.for_id(target_id)
1✔
1636
        if not to_cls:
1✔
1637
            error(f"Couldn't determine protocol for target {target_id}")
×
1638

1639
        to_user = to_cls.get_or_create(target_id, manual_opt_out=False,
1✔
1640
                                       enabled_protocols=from_user.enabled_protocols)
1641
        if not to_user:
1✔
1642
            error(f"Couldn't create {to_cls.LABEL} user {target_id}", status=299)
×
1643

1644
        if from_user.enabled_protocols:
1✔
1645
            # from user has bridged copy accounts; transfer them to the new user
1646
            for label in from_user.enabled_protocols:
1✔
1647
                proto = PROTOCOLS[label]
1✔
1648
                if copy_id := from_user.get_copy(proto):
1✔
1649
                    from_user.remove_copies_on(proto)
1✔
1650
                    to_user.add('copies', Target(uri=copy_id, protocol=label))
1✔
1651

1652
            from_user.put()
1✔
1653
            to_user.put()
1✔
1654

1655
        # query for all active followers of the source account
1656
        followers = Follower.query(
1✔
1657
            Follower.to == from_user.key,
1658
            Follower.status == 'active'
1659
        ).fetch()
1660

1661
        # update each follower to point to the new account
1662
        # but skip if it would create a same-protocol follower
1663
        logger.info(f'Updating {len(followers)} followers from {actor_id} to {target_id}')
1✔
1664
        updated_followers = []
1✔
1665
        for follower in followers:
1✔
1666
            # check if this would create a same-protocol follower
1667
            if follower.from_.kind() != to_user.key.kind():
1✔
1668
                follower.to = to_user.key
1✔
1669
                updated_followers.append(follower)
1✔
1670
            else:
1671
                logger.info(f'Skipping same-protocol follower {follower.from_.id()} => {to_user.key.id()}')
1✔
1672

1673
        if updated_followers:
1✔
1674
            ndb.put_multi(updated_followers)
1✔
1675

1676
    @classmethod
1✔
1677
    def handle_bare_object(cls, obj, *, authed_as, from_user):
1✔
1678
        """If obj is a bare object, wraps it in a create or update activity.
1679

1680
        Checks if we've seen it before.
1681

1682
        Args:
1683
          obj (models.Object)
1684
          authed_as (str): authenticated actor id who sent this activity
1685
          from_user (models.User): user (actor) this activity/object is from
1686

1687
        Returns:
1688
          models.Object: ``obj`` if it's an activity, otherwise a new object
1689
        """
1690
        is_actor = obj.type in as1.ACTOR_TYPES
1✔
1691
        if not is_actor and obj.type not in ('note', 'article', 'comment'):
1✔
1692
            return obj
1✔
1693

1694
        obj_actor = ids.normalize_user_id(id=as1.get_owner(obj.as1), proto=cls)
1✔
1695
        now = util.now().isoformat()
1✔
1696

1697
        # this is a raw post; wrap it in a create or update activity
1698
        if obj.changed or is_actor:
1✔
1699
            if obj.changed:
1✔
1700
                logger.info(f'Content has changed from last time at {obj.updated}! Redelivering to all inboxes')
1✔
1701
            else:
1702
                logger.info(f'Got actor profile object, wrapping in update')
1✔
1703
            id = f'{obj.key.id()}#bridgy-fed-update-{now}'
1✔
1704
            update_as1 = {
1✔
1705
                'objectType': 'activity',
1706
                'verb': 'update',
1707
                'id': id,
1708
                'actor': obj_actor,
1709
                'object': {
1710
                    # Mastodon requires the updated field for Updates, so
1711
                    # add a default value.
1712
                    # https://docs.joinmastodon.org/spec/activitypub/#supported-activities-for-statuses
1713
                    # https://socialhub.activitypub.rocks/t/what-could-be-the-reason-that-my-update-activity-does-not-work/2893/4
1714
                    # https://github.com/mastodon/documentation/pull/1150
1715
                    'updated': now,
1716
                    **obj.as1,
1717
                },
1718
            }
1719
            logger.debug(f'  AS1: {json_dumps(update_as1, indent=2)}')
1✔
1720
            return Object(id=id, our_as1=update_as1,
1✔
1721
                          source_protocol=obj.source_protocol)
1722

1723
        if obj.new or 'force' in request.values:
1✔
1724
            create_id = f'{obj.key.id()}#bridgy-fed-create-{now}'
1✔
1725
            create_as1 = {
1✔
1726
                'objectType': 'activity',
1727
                'verb': 'post',
1728
                'id': create_id,
1729
                'actor': obj_actor,
1730
                'object': obj.as1,
1731
                'published': now,
1732
            }
1733
            logger.info(f'Wrapping in post')
1✔
1734
            logger.debug(f'  AS1: {json_dumps(create_as1, indent=2)}')
1✔
1735
            return Object(id=create_id, our_as1=create_as1,
1✔
1736
                          source_protocol=obj.source_protocol)
1737

1738
        error(f'{obj.key.id()} is unchanged, nothing to do', status=204)
×
1739

1740
    @classmethod
1✔
1741
    def deliver(from_cls, obj, from_user, crud_obj=None, to_proto=None):
1✔
1742
        """Delivers an activity to its external recipients.
1743

1744
        Args:
1745
          obj (models.Object): activity to deliver
1746
          from_user (models.User): user (actor) this activity is from
1747
          crud_obj (models.Object): if this is a create, update, or delete/undo
1748
            activity, the inner object that's being written, otherwise None.
1749
            (This object's ``notify`` and ``feed`` properties may be updated.)
1750
          to_proto (protocol.Protocol): optional; if provided, only deliver to
1751
            targets on this protocol
1752

1753
        Returns:
1754
          (str, int) tuple: Flask response
1755
        """
1756
        if to_proto:
1✔
1757
            logger.info(f'Only delivering to {to_proto.LABEL}')
1✔
1758

1759
        # find delivery targets. maps Target to Object or None
1760
        #
1761
        # ...then write the relevant object, since targets() has a side effect of
1762
        # setting the notify and feed properties (and dirty attribute)
1763
        targets = from_cls.targets(obj, from_user=from_user, crud_obj=crud_obj)
1✔
1764
        if to_proto:
1✔
1765
            targets = {t: obj for t, obj in targets.items()
1✔
1766
                       if t.protocol == to_proto.LABEL}
1767
        if not targets:
1✔
1768
            # don't raise via error() because we call deliver in code paths where
1769
            # we want to continue after
1770
            msg = r'No targets, nothing to do ¯\_(ツ)_/¯'
1✔
1771
            logger.info(msg)
1✔
1772
            return msg, 204
1✔
1773

1774
        # store object that targets() updated
1775
        if crud_obj and crud_obj.dirty:
1✔
1776
            crud_obj.put()
1✔
1777
        elif obj.type in STORE_AS1_TYPES and obj.dirty:
1✔
1778
            obj.put()
1✔
1779

1780
        obj_params = ({'obj_id': obj.key.id()} if obj.type in STORE_AS1_TYPES
1✔
1781
                      else obj.to_request())
1782

1783
        # sort targets so order is deterministic for tests, debugging, etc
1784
        sorted_targets = sorted(targets.items(), key=lambda t: t[0].uri)
1✔
1785

1786
        # enqueue send task for each targets
1787
        logger.info(f'Delivering to {" ".join(t.uri for t, _ in sorted_targets)}')
1✔
1788
        user = from_user.key.urlsafe()
1✔
1789
        for i, (target, orig_obj) in enumerate(sorted_targets):
1✔
1790
            orig_obj_id = orig_obj.key.id() if orig_obj else None
1✔
1791
            common.create_task(queue='send', url=target.uri, protocol=target.protocol,
1✔
1792
                               orig_obj_id=orig_obj_id, user=user, **obj_params)
1793

1794
        return 'OK', 202
1✔
1795

1796
    @classmethod
1✔
1797
    def targets(from_cls, obj, from_user, crud_obj=None, internal=False):
1✔
1798
        """Collects the targets to send a :class:`models.Object` to.
1799

1800
        Targets are both objects - original posts, events, etc - and actors.
1801

1802
        Args:
1803
          obj (models.Object)
1804
          from_user (User)
1805
          crud_obj (models.Object): if this is a create, update, or delete/undo
1806
            activity, the inner object that's being written, otherwise None.
1807
            (This object's ``notify`` and ``feed`` properties may be updated.)
1808
          internal (bool): whether this is a recursive internal call
1809

1810
        Returns:
1811
          dict: maps :class:`models.Target` to original (in response to)
1812
          :class:`models.Object`
1813
        """
1814
        logger.debug('Finding recipients and their targets')
1✔
1815

1816
        # we should only have crud_obj iff this is a create or update
1817
        assert (crud_obj is not None) == (obj.type in ('post', 'update')), obj.type
1✔
1818
        write_obj = crud_obj or obj
1✔
1819
        write_obj.dirty = False
1✔
1820

1821
        target_uris = as1.targets(obj.as1)
1✔
1822
        orig_obj = None
1✔
1823
        targets = {}  # maps Target (with *normalized* uri) to Object or None
1✔
1824
        owner = as1.get_owner(obj.as1)
1✔
1825
        allow_opt_out = (obj.type == 'delete')
1✔
1826
        inner_obj_as1 = as1.get_object(obj.as1)
1✔
1827
        inner_obj_id = inner_obj_as1.get('id')
1✔
1828
        in_reply_tos = as1.get_ids(inner_obj_as1, 'inReplyTo')
1✔
1829
        quoted_posts = as1.quoted_posts(inner_obj_as1)
1✔
1830
        mentioned_urls = as1.mentions(inner_obj_as1)
1✔
1831
        is_reply = obj.type == 'comment' or in_reply_tos
1✔
1832
        is_self_reply = False
1✔
1833

1834
        original_ids = []
1✔
1835
        if is_reply:
1✔
1836
            original_ids = in_reply_tos
1✔
1837
        elif inner_obj_id:
1✔
1838
            if inner_obj_id == from_user.key.id():
1✔
1839
                inner_obj_id = from_user.profile_id()
1✔
1840
            original_ids = [inner_obj_id]
1✔
1841

1842
        # maps id to Object
1843
        original_objs = {}
1✔
1844
        for id in original_ids:
1✔
1845
            if proto := Protocol.for_id(id):
1✔
1846
                original_objs[id] = proto.load(id, raise_=False)
1✔
1847

1848
        # for AP, add in-reply-tos' mentions
1849
        # https://github.com/snarfed/bridgy-fed/issues/1608
1850
        # https://github.com/snarfed/bridgy-fed/issues/1218
1851
        orig_post_mentions = {}  # maps mentioned id to original post Object
1✔
1852
        for id in in_reply_tos:
1✔
1853
            if ((in_reply_to_obj := original_objs.get(id))
1✔
1854
                    and (proto := PROTOCOLS.get(in_reply_to_obj.source_protocol))
1855
                    and proto.SEND_REPLIES_TO_ORIG_POSTS_MENTIONS
1856
                    and (mentions := as1.mentions(in_reply_to_obj.as1))):
1857
                logger.info(f"Adding in-reply-to {id} 's mentions to targets: {mentions}")
1✔
1858
                target_uris.extend(mentions)
1✔
1859
                for mention in mentions:
1✔
1860
                    orig_post_mentions[mention] = in_reply_to_obj
1✔
1861

1862
        target_uris = sorted(set(target_uris))
1✔
1863
        logger.info(f'Raw targets: {target_uris}')
1✔
1864

1865
        # which protocols should we allow delivering to?
1866
        to_protocols = []  # elements are Protocol subclasses
1✔
1867
        for label in (list(from_user.DEFAULT_ENABLED_PROTOCOLS)
1✔
1868
                      + from_user.enabled_protocols):
1869
            if not (proto := PROTOCOLS.get(label)):
1✔
1870
                report_error(f'unknown enabled protocol {label} for {from_user.key.id()}')
1✔
1871
                continue
1✔
1872

1873
            if (obj.type == 'post' and (orig := original_objs.get(inner_obj_id))
1✔
1874
                    and orig.get_copy(proto)):
1875
                logger.info(f'Already created {id} on {label}, cowardly refusing to create there again')
1✔
1876
                continue
1✔
1877

1878
            if proto.HAS_COPIES and (obj.type in ('update', 'delete', 'share', 'undo')
1✔
1879
                                     or is_reply):
1880
                origs_could_bridge = None
1✔
1881

1882
                for id in original_ids:
1✔
1883
                    if not (orig := original_objs.get(id)):
1✔
1884
                        continue
1✔
1885
                    elif orig.get_copy(proto):
1✔
1886
                        logger.info(f'Allowing {label}, original {id} was bridged there')
1✔
1887
                        break
1✔
1888
                    elif from_user.is_profile(orig):
1✔
1889
                        logger.info(f"Allowing {label}, this is the user's profile")
1✔
1890
                        break
1✔
1891

1892
                    if (origs_could_bridge is not False
1✔
1893
                            and (orig_author_id := as1.get_owner(orig.as1))
1894
                            and (orig_proto := orig.owner_protocol())
1895
                            and (orig_author := orig_proto.get_by_id(orig_author_id))):
1896
                        origs_could_bridge = orig_author.is_enabled(proto)
1✔
1897

1898
                else:
1899
                    msg = f"original object(s) {original_ids} weren't bridged to {label}"
1✔
1900
                    last_retry = False
1✔
1901
                    if retries := request.headers.get(TASK_RETRIES_HEADER):
1✔
1902
                        if (last_retry := int(retries) >= TASK_RETRIES_RECEIVE):
1✔
1903
                            logger.info(f'last retry! skipping {proto.LABEL} and continuing')
1✔
1904

1905
                    if (proto.LABEL not in from_user.DEFAULT_ENABLED_PROTOCOLS
1✔
1906
                            and origs_could_bridge and not last_retry):
1907
                        # retry later; original obj may still be bridging
1908
                        # TODO: limit to brief window, eg no older than 2h? 1d?
1909
                        error(msg, status=304)
1✔
1910

1911
                    logger.info(msg)
1✔
1912
                    continue
1✔
1913

1914
            util.add(to_protocols, proto)
1✔
1915

1916
        logger.info(f'allowed protocols {[p.LABEL for p in to_protocols]}')
1✔
1917

1918
        # process direct targets
1919
        for target_id in target_uris:
1✔
1920
            target_proto = Protocol.for_id(target_id)
1✔
1921
            if not target_proto:
1✔
1922
                logger.info(f"Can't determine protocol for {target_id}")
1✔
1923
                continue
1✔
1924
            elif target_proto.is_blocklisted(target_id):
1✔
1925
                logger.debug(f'{target_id} is blocklisted')
1✔
1926
                continue
1✔
1927

1928
            target_is_actor = (target_id in mentioned_urls
1✔
1929
                               or obj.type in as1.VERBS_WITH_ACTOR_OBJECT)
1930

1931
            target_obj_id = (ids.profile_id(id=target_id, proto=target_proto)
1✔
1932
                             if target_is_actor
1933
                             # not ideal. this can sometimes be a non-user, eg
1934
                             # blocking a blocklist. ok right now since profile_id()
1935
                             # returns its input id unchanged if it doesn't look like
1936
                             # a user id, but that's brittle.
1937
                             else target_id)
1938
            orig_obj = target_proto.load(target_obj_id, raise_=False)
1✔
1939
            if not orig_obj or not orig_obj.as1:
1✔
1940
                logger.info(f"Couldn't load {target_obj_id}")
1✔
1941
                continue
1✔
1942

1943
            target_author_key = (target_proto(id=target_id).key if target_is_actor
1✔
1944
                                 else target_proto.actor_key(orig_obj))
1945

1946
            if not from_user.is_enabled(target_proto):
1✔
1947
                # if author isn't bridged and target user is, DM a prompt and
1948
                # add a notif for the target user
1949
                if (target_id in (in_reply_tos + quoted_posts + mentioned_urls)
1✔
1950
                        and target_author_key):
1951
                    if target_author := target_author_key.get():
1✔
1952
                        if target_author.is_enabled(from_cls):
1✔
1953
                            notifications.add_notification(target_author, write_obj)
1✔
1954
                            verb, noun = (
1✔
1955
                                ('replied to', 'replies') if target_id in in_reply_tos
1956
                                else ('quoted', 'quotes') if target_id in quoted_posts
1957
                                else ('mentioned', 'mentions'))
1958
                            dms.maybe_send(from_=target_proto, to_user=from_user,
1✔
1959
                                           type='replied_to_bridged_user', text=f"""\
1960
Hi! You <a href="{inner_obj_as1.get('url') or inner_obj_id}">recently {verb}</a> {target_author.html_link()}, who's bridged here from {target_proto.PHRASE}. If you want them to see your {noun}, you can bridge your account into {target_proto.PHRASE} by following this account. <a href="https://fed.brid.gy/docs">See the docs</a> for more information.""")
1961

1962
                continue
1✔
1963

1964
            # deliver self-replies to followers
1965
            # https://github.com/snarfed/bridgy-fed/issues/639
1966
            if target_id in in_reply_tos and owner == as1.get_owner(orig_obj.as1):
1✔
1967
                is_self_reply = True
1✔
1968
                logger.info(f'self reply!')
1✔
1969

1970
            # also add copies' targets
1971
            for copy in orig_obj.copies:
1✔
1972
                proto = PROTOCOLS[copy.protocol]
1✔
1973
                if proto in to_protocols:
1✔
1974
                    # copies generally won't have their own Objects
1975
                    if target := proto.target_for(Object(id=copy.uri)):
1✔
1976
                        target = util.normalize_url(target, trailing_slash=False)
1✔
1977
                        logger.debug(f'Adding target {target} for copy {copy.uri} of original {target_id}')
1✔
1978
                        targets[Target(protocol=copy.protocol, uri=target)] = orig_obj
1✔
1979

1980
            if target_proto == from_cls:
1✔
1981
                logger.debug(f'Skipping same-protocol target {target_id}')
1✔
1982
                continue
1✔
1983

1984
            target = target_proto.target_for(orig_obj)
1✔
1985
            if not target:
1✔
1986
                # TODO: surface errors like this somehow?
1987
                logger.error(f"Can't find delivery target for {target_id}")
×
1988
                continue
×
1989

1990
            target = util.normalize_url(target, trailing_slash=False)
1✔
1991
            logger.debug(f'Target for {target_id} is {target} {target_author_key}')
1✔
1992

1993
            # only use orig_obj for inReplyTos, like/repost objects, reply's original
1994
            # post's mentions, etc
1995
            # https://github.com/snarfed/bridgy-fed/issues/1237
1996
            target_obj = None
1✔
1997
            if target_id in in_reply_tos + as1.get_ids(obj.as1, 'object'):
1✔
1998
                target_obj = orig_obj
1✔
1999
            elif target_id in orig_post_mentions:
1✔
2000
                target_obj = orig_post_mentions[target_id]
1✔
2001
            targets[Target(protocol=target_proto.LABEL, uri=target)] = target_obj
1✔
2002

2003
            if target_author_key:
1✔
2004
                logger.debug(f'Recipient is {target_author_key}')
1✔
2005
                if obj.type not in DONT_NOTIFY_TYPES:
1✔
2006
                    if write_obj.add('notify', target_author_key):
1✔
2007
                        write_obj.dirty = True
1✔
2008

2009
        if obj.type == 'undo':
1✔
2010
            logger.info('Object is an undo; adding targets for inner object')
1✔
2011
            if set(inner_obj_as1.keys()) == {'id'}:
1✔
2012
                inner_obj = from_cls.load(inner_obj_id, raise_=False)
1✔
2013
            else:
2014
                inner_obj = Object(id=inner_obj_id, our_as1=inner_obj_as1)
1✔
2015
            if inner_obj:
1✔
2016
                for target, target_obj in from_cls.targets(
1✔
2017
                        inner_obj, from_user=from_user, internal=True).items():
2018
                    targets[target] = target_obj
1✔
2019
                    util.add(to_protocols, PROTOCOLS[target.protocol])
1✔
2020

2021
        if not to_protocols:
1✔
2022
            return {}
1✔
2023

2024
        logger.info(f'Direct targets: {[t.uri for t in targets.keys()]}')
1✔
2025

2026
        # deliver to followers, if appropriate
2027
        user_key = from_cls.actor_key(obj, allow_opt_out=allow_opt_out)
1✔
2028
        if not user_key:
1✔
2029
            logger.info("Can't tell who this is from! Skipping followers.")
1✔
2030
            return targets
1✔
2031

2032
        # we deliver to HAS_COPIES protocols separately, below. we assume they have
2033
        # follower-independent targets.
2034
        to_followers_protos = [
1✔
2035
            p for p in to_protocols
2036
            if not (p.HAS_COPIES and p.DEFAULT_TARGET)
2037
            and not (p.USES_OBJECT_FEED and p.LABEL not in from_user.has_object_feed_followers_on)]
2038
        followers = []
1✔
2039
        is_undo_block = obj.type == 'undo' and inner_obj_as1.get('verb') == 'block'
1✔
2040
        if (obj.type in ('post', 'update', 'delete', 'move', 'share', 'undo')
1✔
2041
                and (not is_reply or is_self_reply) and not is_undo_block
2042
                and to_followers_protos):
2043
            logger.info(f'Delivering to followers of {user_key.id()} on {[p.LABEL for p in to_followers_protos]}')
1✔
2044
            # query each protocol individually
2045
            for proto in to_followers_protos:
1✔
2046
                kind = proto._get_kind()
1✔
2047
                for f in Follower.query(
1✔
2048
                        Follower.to == user_key,
2049
                        Follower.status == 'active',
2050
                        Follower.from_ >= ndb.Key(kind, '\x00'),
2051
                        Follower.from_ < ndb.Key(kind + '\x00', '\x00')):
2052
                    # skip protocol bot users
2053
                    if not Protocol.for_bridgy_subdomain(f.from_.id()):
1✔
2054
                        followers.append(f)
1✔
2055

2056
            logger.debug(f'  loaded {len(followers)} followers')
1✔
2057

2058
            user_keys = [f.from_ for f in followers]
1✔
2059
            users = [u for u in ndb.get_multi(user_keys) if u]
1✔
2060
            logger.debug(f'  loaded {len(users)} users')
1✔
2061

2062
            User.load_multi(users)
1✔
2063
            logger.debug(f'  loaded user objects')
1✔
2064

2065
            if (not followers and
1✔
2066
                (util.domain_or_parent_in(from_user.key.id(), LIMITED_DOMAINS)
2067
                 or util.domain_or_parent_in(obj.key.id(), LIMITED_DOMAINS))):
2068
                logger.info(f'skipping, {from_user.key.id()} is on a limited domain and has no followers')
1✔
2069
                return {}
1✔
2070

2071
            # add to followers' feeds, if any
2072
            if not internal and obj.type in ('post', 'update', 'share'):
1✔
2073
                if write_obj.type not in as1.ACTOR_TYPES:
1✔
2074
                    write_obj.feed = [u.key for u in users if u.USES_OBJECT_FEED]
1✔
2075
                    if write_obj.feed:
1✔
2076
                        write_obj.dirty = True
1✔
2077

2078
            # collect targets for followers
2079
            target_obj = (original_objs.get(inner_obj_id)
1✔
2080
                          if obj.type == 'share' else None)
2081
            for user in users:
1✔
2082
                if user.is_blocking(from_user):
1✔
2083
                    logger.debug(f'  {user.key.id()} blocks {from_user.key.id()}')
1✔
2084
                    continue
1✔
2085

2086
                # TODO: should we pass remote=False through here to Protocol.load?
2087
                target = user.target_for(user.obj, shared=True) if user.obj else None
1✔
2088
                if not target:
1✔
2089
                    continue
1✔
2090

2091
                target = util.normalize_url(target, trailing_slash=False)
1✔
2092
                targets[Target(protocol=user.LABEL, uri=target)] = target_obj
1✔
2093

2094
            logger.debug(f'  collected {len(targets)} targets')
1✔
2095

2096
        # deliver to enabled HAS_COPIES protocols proactively
2097
        if obj.type in ('post', 'update', 'delete', 'share'):
1✔
2098
            for proto in to_protocols:
1✔
2099
                if proto.HAS_COPIES and proto.DEFAULT_TARGET:
1✔
2100
                    logger.info(f'user has {proto.LABEL} enabled, adding {proto.DEFAULT_TARGET}')
1✔
2101
                    targets.setdefault(
1✔
2102
                        Target(protocol=proto.LABEL, uri=proto.DEFAULT_TARGET), None)
2103

2104
        # maps string target URL to (Target, Object) tuple
2105
        candidates = {t.uri: (t, obj) for t, obj in targets.items()}
1✔
2106
        # maps Target to Object or None
2107
        targets = {}
1✔
2108
        source_domains = [
1✔
2109
            util.domain_from_link(url) for url in
2110
            (obj.as1.get('id'), obj.as1.get('url'), as1.get_owner(obj.as1))
2111
            if util.is_web(url)
2112
        ]
2113
        for url in sorted(util.dedupe_urls(
1✔
2114
                candidates.keys(),
2115
                # preserve our PDS URL without trailing slash in path
2116
                # https://atproto.com/specs/did#did-documents
2117
                trailing_slash=False)):
2118
            if util.is_web(url) and util.domain_from_link(url) in source_domains:
1✔
2119
                logger.info(f'Skipping same-domain target {url}')
×
2120
                continue
×
2121
            elif from_user.is_blocking(url):
1✔
2122
                logger.debug(f'{from_user.key.id()} blocks {url}')
1✔
2123
                continue
1✔
2124

2125
            target, obj = candidates[url]
1✔
2126
            targets[target] = obj
1✔
2127

2128
        return targets
1✔
2129

2130
    @classmethod
1✔
2131
    def load(cls, id, remote=None, local=True, raise_=True, raw=False, csv=False,
1✔
2132
             **kwargs):
2133
        """Loads and returns an Object from datastore or HTTP fetch.
2134

2135
        Sets the :attr:`new` and :attr:`changed` attributes if we know either
2136
        one for the loaded object, ie local is True and remote is True or None.
2137

2138
        Args:
2139
          id (str)
2140
          remote (bool): whether to fetch the object over the network. If True,
2141
            fetches even if we already have the object stored, and updates our
2142
            stored copy. If False and we don't have the object stored, returns
2143
            None. Default (None) means to fetch over the network only if we
2144
            don't already have it stored.
2145
          local (bool): whether to load from the datastore before
2146
            fetching over the network. If False, still stores back to the
2147
            datastore after a successful remote fetch.
2148
          raise_ (bool): if False, catches any :class:`request.RequestException`
2149
            or :class:`HTTPException` raised by :meth:`fetch()` and returns
2150
            ``None`` instead
2151
          raw (bool): whether to load this as a "raw" id, as is, without
2152
            normalizing to an on-protocol object id. Exact meaning varies by subclass.
2153
          csv (bool): whether to specifically load a CSV object
2154
            TODO: merge this into raw, using returned Content-Type?
2155
          kwargs: passed through to :meth:`fetch()`
2156

2157
        Returns:
2158
          models.Object: loaded object, or None if it isn't fetchable, eg a
2159
          non-URL string for Web, or ``remote`` is False and it isn't in the
2160
          datastore
2161

2162
        Raises:
2163
          requests.HTTPError: anything that :meth:`fetch` raises, if ``raise_``
2164
            is True
2165
        """
2166
        assert id
1✔
2167
        assert local or remote is not False
1✔
2168
        # logger.debug(f'Loading Object {id} local={local} remote={remote}')
2169

2170
        if not raw:
1✔
2171
            id = ids.normalize_object_id(id=id, proto=cls)
1✔
2172

2173
        obj = orig_as1 = None
1✔
2174
        if local:
1✔
2175
            if obj := Object.get_by_id(id):
1✔
2176
                if csv and not obj.is_csv:
1✔
2177
                    return None
1✔
2178
                elif obj.as1 or obj.csv or obj.raw or obj.deleted:
1✔
2179
                    # logger.debug(f'  {id} got from datastore')
2180
                    obj.new = False
1✔
2181

2182
        if remote is False:
1✔
2183
            return obj
1✔
2184
        elif remote is None and obj:
1✔
2185
            if obj.updated < util.as_utc(util.now() - OBJECT_REFRESH_AGE):
1✔
2186
                # logger.debug(f'  last updated {obj.updated}, refreshing')
2187
                pass
1✔
2188
            else:
2189
                return obj
1✔
2190

2191
        if obj:
1✔
2192
            orig_as1 = obj.as1
1✔
2193
            obj.our_as1 = None
1✔
2194
            obj.new = False
1✔
2195
        else:
2196
            if cls == Protocol:
1✔
2197
                return None
1✔
2198
            obj = Object(id=id)
1✔
2199
            if local:
1✔
2200
                # logger.debug(f'  {id} not in datastore')
2201
                obj.new = True
1✔
2202
                obj.changed = False
1✔
2203

2204
        try:
1✔
2205
            fetched = cls.fetch(obj, csv=csv, **kwargs)
1✔
2206
        except (RequestException, HTTPException, InvalidStatus) as e:
1✔
2207
            if raise_:
1✔
2208
                raise
1✔
2209
            util.interpret_http_exception(e)
1✔
2210
            return None
1✔
2211

2212
        if not fetched:
1✔
2213
            return None
1✔
2214
        elif csv and not obj.is_csv:
1✔
2215
            return None
×
2216

2217
        # https://stackoverflow.com/a/3042250/186123
2218
        size = len(_entity_to_protobuf(obj)._pb.SerializeToString())
1✔
2219
        if size > MAX_ENTITY_SIZE:
1✔
2220
            logger.warning(f'Object is too big! {size} bytes is over {MAX_ENTITY_SIZE}')
1✔
2221
            return None
1✔
2222

2223
        obj.resolve_ids()
1✔
2224
        obj.normalize_ids()
1✔
2225

2226
        if obj.new is False:
1✔
2227
            obj.changed = obj.activity_changed(orig_as1)
1✔
2228

2229
        if obj.source_protocol not in (cls.LABEL, cls.ABBREV):
1✔
2230
            if obj.source_protocol:
1✔
2231
                logger.warning(f'Object {obj.key.id()} changed protocol from {obj.source_protocol} to {cls.LABEL} ?!')
×
2232
            obj.source_protocol = cls.LABEL
1✔
2233

2234
        obj.put()
1✔
2235
        return obj
1✔
2236

2237
    @classmethod
1✔
2238
    def check_supported(cls, obj, direction):
1✔
2239
        """If this protocol doesn't support this activity, raises HTTP 204.
2240

2241
        Also reports an error.
2242

2243
        (This logic is duplicated in some protocols, eg ActivityPub, so that
2244
        they can short circuit out early. It generally uses their native formats
2245
        instead of AS1, before an :class:`models.Object` is created.)
2246

2247
        Args:
2248
          obj (Object)
2249
          direction (str): ``'receive'`` or  ``'send'``
2250

2251
        Raises:
2252
          werkzeug.HTTPException: if this protocol doesn't support this object
2253
        """
2254
        assert direction in ('receive', 'send')
1✔
2255
        if not obj.type:
1✔
2256
            return
×
2257

2258
        inner = as1.get_object(obj.as1)
1✔
2259
        inner_type = as1.object_type(inner) or ''
1✔
2260
        if (obj.type not in cls.SUPPORTED_AS1_TYPES
1✔
2261
            or (obj.type in as1.CRUD_VERBS
2262
                and inner_type
2263
                and inner_type not in cls.SUPPORTED_AS1_TYPES)):
2264
            error(f"Bridgy Fed for {cls.LABEL} doesn't support {obj.type} {inner_type} yet", status=204)
1✔
2265

2266
        # don't allow posts with blank content and no image/video/audio
2267
        crud_obj = (as1.get_object(obj.as1) if obj.type in ('post', 'update')
1✔
2268
                    else obj.as1)
2269
        if (crud_obj.get('objectType') in as1.POST_TYPES
1✔
2270
                and not util.get_url(crud_obj, key='image')
2271
                and not any(util.get_urls(crud_obj, 'attachments', inner_key='stream'))
2272
                # TODO: handle articles with displayName but not content
2273
                and not source.html_to_text(crud_obj.get('content')).strip()):
2274
            error('Blank content and no image or video or audio', status=204)
1✔
2275

2276
        # receiving DMs is only allowed to protocol bot accounts
2277
        if direction == 'receive':
1✔
2278
            if recip := as1.recipient_if_dm(obj.as1):
1✔
2279
                owner = as1.get_owner(obj.as1)
1✔
2280
                if (not cls.SUPPORTS_DMS or (recip not in common.bot_user_ids()
1✔
2281
                                             and owner not in common.bot_user_ids())):
2282
                    # reply and say DMs aren't supported
2283
                    from_proto = obj.owner_protocol()
1✔
2284
                    to_proto = Protocol.for_id(recip)
1✔
2285
                    if owner and from_proto and to_proto:
1✔
2286
                        if ((from_user := from_proto.get_or_create(id=owner))
1✔
2287
                                and (to_user := to_proto.get_or_create(id=recip))):
2288
                            in_reply_to = (inner.get('id') if obj.type == 'post'
1✔
2289
                                           else obj.as1.get('id'))
2290
                            text = f"Hi! Sorry, this account is bridged from {to_user.PHRASE}, so it doesn't support DMs. Try getting in touch another way!"
1✔
2291
                            type = f'dms_not_supported-{to_user.key.id()}'
1✔
2292
                            dms.maybe_send(from_=to_user, to_user=from_user,
1✔
2293
                                           text=text, type=type,
2294
                                           in_reply_to=in_reply_to)
2295

2296
                    error("Bridgy Fed doesn't support DMs", status=204)
1✔
2297

2298
            # check that this activity is public. only do this for some activities,
2299
            # not eg likes or follows, since Mastodon doesn't currently mark those
2300
            # as explicitly public.
2301
            elif (obj.type in set(('post', 'update')) | as1.POST_TYPES | as1.ACTOR_TYPES
1✔
2302
                  and not util.domain_or_parent_in(crud_obj.get('id'), NON_PUBLIC_DOMAINS)
2303
                  and not as1.is_public(obj.as1, unlisted=False)):
2304
                error('Bridgy Fed only supports public activities', status=204)
1✔
2305

2306
    @classmethod
1✔
2307
    def block(cls, from_user, arg):
1✔
2308
        """Blocks a user or list.
2309

2310
        Args:
2311
          from_user (models.User): user doing the blocking
2312
          arg (str): handle or id of user/list to block
2313

2314
        Returns:
2315
          models.User or models.Object: user or list that was blocked
2316

2317
        Raises:
2318
          ValueError: if arg doesn't look like a user or list on this protocol
2319
        """
2320
        logger.info(f'user {from_user.key.id()} trying to block {arg}')
1✔
2321

2322
        def fail(msg):
1✔
2323
            logger.warning(msg)
1✔
2324
            raise ValueError(msg)
1✔
2325

2326
        blockee = None
1✔
2327
        try:
1✔
2328
            # first, try interpreting as a user handle or id
2329
            blockee = load_user(arg, proto=cls, create=True, allow_opt_out=True)
1✔
2330
        except (AssertionError, AttributeError, BadRequest, RuntimeError, ValueError) as err:
1✔
2331
            logger.info(err)
1✔
2332

2333
        if type(from_user) == type(blockee):
1✔
2334
            fail(f'{blockee.html_link()} is on {from_user.PHRASE}! Try blocking them there.')
1✔
2335

2336
        # may not be a user, see if it's a list
2337
        if not blockee:
1✔
2338
            if not cls or cls == Protocol:
1✔
2339
                cls = Protocol.for_id(arg)
1✔
2340

2341
            if cls and (blockee := cls.load(arg)) and blockee.type == 'collection':
1✔
2342
                if blockee.source_protocol == from_user.LABEL:
1✔
2343
                    fail(f'{blockee.html_link()} is on {from_user.PHRASE}! Try blocking it there.')
1✔
2344
            else:
2345
                if blocklist := from_user.add_domain_blocklist(arg):
1✔
2346
                    return blocklist
1✔
2347
                fail(f"{arg} doesn't look like a user or list{' on ' + cls.PHRASE if cls else ''}, or we couldn't fetch it")
1✔
2348

2349
        logger.info(f'  blocking {blockee.key.id()}')
1✔
2350
        id = f'{from_user.key.id()}#bridgy-fed-block-{util.now().isoformat()}'
1✔
2351
        obj = Object(id=id, source_protocol=from_user.LABEL, our_as1={
1✔
2352
            'objectType': 'activity',
2353
            'verb': 'block',
2354
            'id': id,
2355
            'actor': from_user.key.id(),
2356
            'object': blockee.key.id(),
2357
        })
2358
        obj.put()
1✔
2359
        from_user.deliver(obj, from_user=from_user)
1✔
2360

2361
        return blockee
1✔
2362

2363
    @classmethod
1✔
2364
    def unblock(cls, from_user, arg):
1✔
2365
        """Unblocks a user or list.
2366

2367
        Args:
2368
          from_user (models.User): user doing the unblocking
2369
          arg (str): handle or id of user/list to unblock
2370

2371
        Returns:
2372
          models.User or models.Object: user or list that was unblocked
2373

2374
        Raises:
2375
          ValueError: if arg doesn't look like a user or list on this protocol
2376
        """
2377
        logger.info(f'user {from_user.key.id()} trying to unblock {arg}')
1✔
2378
        def fail(msg):
1✔
2379
            logger.warning(msg)
1✔
2380
            raise ValueError(msg)
1✔
2381

2382
        blockee = None
1✔
2383
        try:
1✔
2384
            # first, try interpreting as a user handle or id
2385
            blockee = load_user(arg, cls, create=True, allow_opt_out=True)
1✔
2386
        except (AssertionError, AttributeError, BadRequest, RuntimeError, ValueError) as err:
1✔
2387
            logger.info(err)
1✔
2388

2389
        if type(from_user) == type(blockee):
1✔
2390
            fail(f'{blockee.html_link()} is on {from_user.PHRASE}! Try unblocking them there.')
1✔
2391

2392
        # may not be a user, see if it's a list
2393
        if not blockee:
1✔
2394
            if not cls or cls == Protocol:
1✔
2395
                cls = Protocol.for_id(arg)
1✔
2396

2397
            if cls and (blockee := cls.load(arg)) and blockee.type == 'collection':
1✔
2398
                if blockee.source_protocol == from_user.LABEL:
1✔
2399
                    fail(f'{blockee.html_link()} is on {from_user.PHRASE}! Try blocking it there.')
1✔
2400
            else:
2401
                if blocklist := from_user.remove_domain_blocklist(arg):
1✔
2402
                    return blocklist
1✔
2403
                fail(f"{arg} doesn't look like a user or list{' on ' + cls.PHRASE if cls else ''}, or we couldn't fetch it")
1✔
2404

2405
        logger.info(f'  unblocking {blockee.key.id()}')
1✔
2406
        id = f'{from_user.key.id()}#bridgy-fed-unblock-{util.now().isoformat()}'
1✔
2407
        obj = Object(id=id, source_protocol=from_user.LABEL, our_as1={
1✔
2408
            'objectType': 'activity',
2409
            'verb': 'undo',
2410
            'id': id,
2411
            'actor': from_user.key.id(),
2412
            'object': {
2413
                'objectType': 'activity',
2414
                'verb': 'block',
2415
                'actor': from_user.key.id(),
2416
                'object': blockee.key.id(),
2417
            },
2418
        })
2419
        obj.put()
1✔
2420
        from_user.deliver(obj, from_user=from_user)
1✔
2421

2422
        return blockee
1✔
2423

2424

2425
@cloud_tasks_only(log=None)
1✔
2426
def receive_task():
1✔
2427
    """Task handler for a newly received :class:`models.Object`.
2428

2429
    Calls :meth:`Protocol.receive` with the form parameters.
2430

2431
    Parameters:
2432
      authed_as (str): passed to :meth:`Protocol.receive`
2433
      obj_id (str): key id of :class:`models.Object` to handle
2434
      received_at (str, ISO 8601 timestamp): when we first saw (received)
2435
        this activity
2436
      *: If ``obj_id`` is unset, all other parameters are properties for a new
2437
        :class:`models.Object` to handle
2438

2439
    TODO: migrate incoming webmentions to this. See how we did it for AP. The
2440
    difficulty is that parts of :meth:`protocol.Protocol.receive` depend on
2441
    setup in :func:`web.webmention`, eg :class:`models.Object` with ``new`` and
2442
    ``changed``, HTTP request details, etc. See stash for attempt at this for
2443
    :class:`web.Web`.
2444
    """
2445
    common.log_request()
1✔
2446
    form = request.form.to_dict()
1✔
2447

2448
    authed_as = form.pop('authed_as', None)
1✔
2449
    internal = authed_as == PRIMARY_DOMAIN or authed_as in PROTOCOL_DOMAINS
1✔
2450

2451
    obj = Object.from_request()
1✔
2452
    assert obj
1✔
2453
    assert obj.source_protocol
1✔
2454
    obj.new = True
1✔
2455

2456
    if received_at := form.pop('received_at', None):
1✔
2457
        received_at = datetime.fromisoformat(received_at)
1✔
2458

2459
    try:
1✔
2460
        return PROTOCOLS[obj.source_protocol].receive(
1✔
2461
            obj=obj, authed_as=authed_as, internal=internal, received_at=received_at)
2462
    except RequestException as e:
1✔
2463
        util.interpret_http_exception(e)
1✔
2464
        error(e, status=304)
1✔
2465
    except (RuntimeError, ValueError) as e:
1✔
2466
        logger.warning(e, exc_info=True)
×
2467
        error(e, status=304)
×
2468

2469

2470
@cloud_tasks_only(log=None)
1✔
2471
def send_task():
1✔
2472
    """Task handler for sending an activity to a single specific destination.
2473

2474
    Calls :meth:`Protocol.send` with the form parameters.
2475

2476
    Parameters:
2477
      protocol (str): :class:`Protocol` to send to
2478
      url (str): destination URL to send to
2479
      obj_id (str): key id of :class:`models.Object` to send
2480
      orig_obj_id (str): optional, :class:`models.Object` key id of the
2481
        "original object" that this object refers to, eg replies to or reposts
2482
        or likes
2483
      user (url-safe google.cloud.ndb.key.Key): :class:`models.User` (actor)
2484
        this activity is from
2485
      *: If ``obj_id`` is unset, all other parameters are properties for a new
2486
        :class:`models.Object` to handle
2487
    """
2488
    common.log_request()
1✔
2489

2490
    # prepare
2491
    form = request.form.to_dict()
1✔
2492
    url = form.get('url')
1✔
2493
    protocol = form.get('protocol')
1✔
2494
    if not url or not protocol:
1✔
2495
        logger.warning(f'Missing protocol or url; got {protocol} {url}')
1✔
2496
        return '', 204
1✔
2497

2498
    target = Target(uri=url, protocol=protocol)
1✔
2499
    obj = Object.from_request()
1✔
2500
    assert obj and obj.key and obj.key.id()
1✔
2501

2502
    PROTOCOLS[protocol].check_supported(obj, 'send')
1✔
2503
    allow_opt_out = (obj.type == 'delete')
1✔
2504

2505
    user = None
1✔
2506
    if user_key := form.get('user'):
1✔
2507
        key = ndb.Key(urlsafe=user_key)
1✔
2508
        # use get_by_id so that we follow use_instead
2509
        user = PROTOCOLS_BY_KIND[key.kind()].get_by_id(
1✔
2510
            key.id(), allow_opt_out=allow_opt_out)
2511

2512
    # send
2513
    delay = ''
1✔
2514
    if request.headers.get('X-AppEngine-TaskRetryCount') == '0' and obj.created:
1✔
2515
        delay_s = int((util.now().replace(tzinfo=None) - obj.created).total_seconds())
1✔
2516
        delay = f'({delay_s} s behind)'
1✔
2517
    logger.info(f'Sending {obj.source_protocol} {obj.type} {obj.key.id()} to {protocol} {url} {delay}')
1✔
2518
    logger.debug(f'  AS1: {json_dumps(obj.as1, indent=2)}')
1✔
2519
    sent = None
1✔
2520
    try:
1✔
2521
        sent = PROTOCOLS[protocol].send(obj, url, from_user=user,
1✔
2522
                                        orig_obj_id=form.get('orig_obj_id'))
2523
    except (MemcacheServerError, MemcacheUnexpectedCloseError,
1✔
2524
            MemcacheUnknownError) as e:
2525
        # our memorystore instance is probably undergoing maintenance. re-enqueue
2526
        # task with a delay.
2527
        # https://docs.cloud.google.com/memorystore/docs/memcached/about-maintenance
2528
        report_error(f'memcache error on send task, re-enqueuing in {MEMCACHE_DOWN_TASK_DELAY}: {e}')
1✔
2529
        common.create_task(queue='send', delay=MEMCACHE_DOWN_TASK_DELAY, **form)
1✔
2530
        sent = False
1✔
2531
    except BaseException as e:
1✔
2532
        code, body = util.interpret_http_exception(e)
1✔
2533
        if not code and not body:
1✔
2534
            raise
1✔
2535

2536
    if sent is False:
1✔
2537
        logger.info(f'Failed sending!')
1✔
2538

2539
    return '', 200 if sent else 204 if sent is False else 304
1✔
2540

2541

2542
@cloud_tasks_only(log=None)
1✔
2543
def user_enabled_task():
1✔
2544
    r"""Task handler for when a user enables a protocol.
2545

2546
    DMs any dormant :class:`models.Follower`\s pointing at the user to let them
2547
    know the user is now bridged, so they can follow them for real, and flips
2548
    those ``Follower``\s from ``dormant`` to ``inactive``.
2549

2550
    Parameters:
2551
      user (url-safe google.cloud.ndb.key.Key): the :class:`models.User` who
2552
        enabled bridging
2553
      protocol (str): ``LABEL`` of the protocol they enabled
2554
    """
2555
    common.log_request()
1✔
2556

2557
    proto = PROTOCOLS[request.form['protocol']]
1✔
2558
    user = ndb.Key(urlsafe=request.form['user']).get()
1✔
2559
    assert user
1✔
2560
    logger.info(f'{user.key.id()} is {user.status or "ok"}')
1✔
2561
    if user.status:
1✔
2562
        raise ErrorButDoNotRetryTask()
×
2563

2564
    followers = Follower.query(Follower.to == user.key,
1✔
2565
                               Follower.status == 'dormant').fetch()
2566
    from_users = ndb.get_multi(
1✔
2567
        f.from_ for f in followers if f.from_.kind() == proto._get_kind())
2568

2569
    for follower, from_user in zip(followers, from_users):
1✔
2570
        if from_user and not from_user.status:
1✔
2571
            logger.info('Updating and DMing Follower from {from_user.key.id()}')
1✔
2572
            follower.status = 'inactive'
1✔
2573
            follower.put()
1✔
2574

2575
            relationship = {
1✔
2576
                'bounce': ', who you originally followed before you Bounced,',
2577
                'requested': ', who you asked to bridge,',
2578
            }.get(follower.reason, '')
2579
            dms.maybe_send(from_=proto, to_user=from_user, text=f'<p>Hi! {user.html_link(proto=proto, proto_fallback=True)}{relationship} has bridged their account into {proto.PHRASE}. You can follow them now if you want.')
1✔
2580

2581
    return '', 200
1✔
2582

2583

2584
@cloud_tasks_only(log=None)
1✔
2585
def migrate_out_task():
1✔
2586
    """Task handler for finishing a migration out.
2587

2588
    Currently, for migrating out to ATProto, uploads the user's blobs to the new PDS.
2589
    Otherwise, does nothing.
2590

2591
    Parameters:
2592
      user (str, url-safe ndb.Key of a User): the bridged :class:`models.User`
2593
        migrating out
2594
      protocol (str): destination protocol
2595
      auth (optional url-safe ndb.Key of an oauth-dropins auth entity): the user's
2596
        new account. For ATProto, an :class:`oauth_dropins.bluesky.BlueskyAuth`.
2597
    """
2598
    from atproto import ATProto
1✔
2599

2600
    common.log_request()
1✔
2601

2602
    user = ndb.Key(urlsafe=request.form['user']).get()
1✔
2603
    if not user:
1✔
2604
        raise ErrorButDoNotRetryTask()
×
2605

2606
    if request.form['protocol'] == ATProto.LABEL:
1✔
2607
        auth = ndb.Key(urlsafe=request.form['auth']).get()
1✔
2608
        assert auth
1✔
2609
        ATProto.migrate_out_blobs(user, auth)
1✔
2610

2611
    return '', 200
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc