• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

snarfed / bridgy-fed / a1d1d93d-7219-446d-b940-094647a15097

26 May 2026 03:18AM UTC coverage: 94.223% (+0.03%) from 94.192%
a1d1d93d-7219-446d-b940-094647a15097

push

circleci

snarfed
Protocol.handle_move: if user is bridged, move their copies to the new user

fixes #1442

14 of 15 new or added lines in 2 files covered. (93.33%)

22 existing lines in 1 file now uncovered.

7568 of 8032 relevant lines covered (94.22%)

0.94 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

95.9
/protocol.py
1
"""Base protocol class and common code."""
2
from bs4 import BeautifulSoup
1✔
3
import copy
1✔
4
from datetime import datetime, timedelta, timezone
1✔
5
import logging
1✔
6
import os
1✔
7
import re
1✔
8
from threading import Lock
1✔
9
from urllib.parse import urljoin, urlparse
1✔
10

11
from cachetools import cached, LRUCache
1✔
12
from flask import request
1✔
13
from google.cloud import ndb
1✔
14
from google.cloud.ndb import OR
1✔
15
from google.cloud.ndb.model import _entity_to_protobuf
1✔
16
from granary import as1, as2, source
1✔
17
from granary.source import HTML_ENTITY_RE, html_to_text
1✔
18
from oauth_dropins.webutil.appengine_info import DEBUG
1✔
19
from oauth_dropins.webutil.flask_util import cloud_tasks_only
1✔
20
from oauth_dropins.webutil.models import MAX_ENTITY_SIZE
1✔
21
from oauth_dropins.webutil import util
1✔
22
from oauth_dropins.webutil.util import json_dumps, json_loads
1✔
23
from pymemcache.exceptions import (
1✔
24
    MemcacheServerError,
25
    MemcacheUnexpectedCloseError,
26
    MemcacheUnknownError,
27
)
28
from requests import RequestException
1✔
29
from websockets.exceptions import InvalidStatus
1✔
30
import werkzeug.exceptions
1✔
31
from werkzeug.exceptions import BadGateway, BadRequest, HTTPException
1✔
32

33
import common
1✔
34
from common import (
1✔
35
    ErrorButDoNotRetryTask,
36
    report_error,
37
)
38
from domains import (
1✔
39
    DOMAINS,
40
    LOCAL_DOMAINS,
41
    PRIMARY_DOMAIN,
42
    PROTOCOL_DOMAINS,
43
    SUPERDOMAIN,
44
)
45
import dms
1✔
46
from domains import DOMAIN_BLOCKLIST
1✔
47
import ids
1✔
48
import memcache
1✔
49
from models import (
1✔
50
    Follower,
51
    get_original_user_key,
52
    load_user,
53
    Object,
54
    PROTOCOLS,
55
    PROTOCOLS_BY_KIND,
56
    Target,
57
    User,
58
)
59
import notifications
1✔
60

61
OBJECT_REFRESH_AGE = timedelta(days=30)
1✔
62
DELETE_TASK_DELAY = timedelta(minutes=1)
1✔
63
CREATE_MAX_AGE = timedelta(weeks=2)
1✔
64
CREATE_MAX_AGE_EXEMPT_DOMAINS = (
1✔
65
    'alt.store',
66
)
67
# WARNING: keep this below the receive queue's min_backoff_seconds in queue.yaml!
68
MEMCACHE_LEASE_EXPIRATION = timedelta(seconds=25)
1✔
69
MEMCACHE_DOWN_TASK_DELAY = timedelta(minutes=5)
1✔
70
# WARNING: keep this in sync with queue.yaml's receive and webmention task_retry_limit!
71
TASK_RETRIES_RECEIVE = 4
1✔
72
# https://docs.cloud.google.com/tasks/docs/creating-appengine-handlers#reading-headers
73
TASK_RETRIES_HEADER = 'X-AppEngine-TaskRetryCount'
1✔
74

75
# require a follow for users on these domains before we deliver anything from
76
# them other than their profile
77
LIMITED_DOMAINS = (os.getenv('LIMITED_DOMAINS', '').split()
1✔
78
                   or util.load_file_lines('limited_domains'))
79

80
# domains to allow non-public activities from
81
NON_PUBLIC_DOMAINS = (
1✔
82
    # bridged from twitter (X). bird.makeup, kilogram.makeup, etc federate
83
    # tweets as followers-only, but they're public on twitter itself
84
    '.makeup',
85
)
86

87
DONT_STORE_AS1_TYPES = as1.CRUD_VERBS | set((
1✔
88
    'accept',
89
    'reject',
90
    'stop-following',
91
    'undo',
92
))
93
STORE_AS1_TYPES = (as1.ACTOR_TYPES | as1.POST_TYPES | as1.VERBS_WITH_OBJECT
1✔
94
                   - DONT_STORE_AS1_TYPES)
95

96
DONT_NOTIFY_TYPES = (
1✔
97
    'block',
98
)
99

100
logger = logging.getLogger(__name__)
1✔
101

102

103
def error(*args, status=299, **kwargs):
1✔
104
    """Default HTTP status code to 299 to prevent retrying task."""
105
    return common.error(*args, status=status, **kwargs)
1✔
106

107

108
def activity_id_memcache_key(id):
1✔
109
    return memcache.key(f'receive-{id}')
1✔
110

111

112
class Protocol:
1✔
113
    """Base protocol class. Not to be instantiated; classmethods only."""
114
    ABBREV = None
1✔
115
    'str: lower case abbreviation, used in URL paths'
1✔
116
    PHRASE = None
1✔
117
    'str: human-readable name or phrase. Used in phrases like ``Follow this person on {PHRASE}``'
1✔
118
    OTHER_LABELS = ()
1✔
119
    'sequence of str: label aliases'
1✔
120
    LOGO_EMOJI = ''
1✔
121
    'str: logo emoji, if any'
1✔
122
    LOGO_HTML = ''
1✔
123
    'str: logo ``<img>`` tag, if any'
1✔
124
    CONTENT_TYPE = None
1✔
125
    "str: MIME type of this protocol's native data format, appropriate for the ``Content-Type`` HTTP header."
1✔
126
    HAS_COPIES = False
1✔
127
    'bool: whether this protocol is push and needs us to proactively create "copy" users and objects, as opposed to pulling converted objects on demand'
1✔
128
    DEFAULT_TARGET = None
1✔
129
    'str: optional, the default target URI to send this protocol\'s activities to. May be used as the "shared" target. Often only set if ``HAS_COPIES`` is true.'
1✔
130
    REQUIRES_AVATAR = False
1✔
131
    "bool: whether accounts on this protocol are required to have a profile picture. If they don't, their ``User.status`` will be ``blocked``."
1✔
132
    REQUIRES_NAME = False
1✔
133
    "bool: whether accounts on this protocol are required to have a profile name that's different than their handle or id. If they don't, their ``User.status`` will be ``blocked``."
1✔
134
    REQUIRES_OLD_ACCOUNT = False
1✔
135
    "bool: whether accounts on this protocol are required to be at least :const:`common.OLD_ACCOUNT_AGE` old. If their profile includes creation date and it's not old enough, their ``User.status`` will be ``blocked``."
1✔
136
    DEFAULT_ENABLED_PROTOCOLS = ()
1✔
137
    'sequence of str: labels of other protocols that are automatically enabled for this protocol to bridge into'
1✔
138
    DEFAULT_SERVE_USER_PAGES = False
1✔
139
    "bool: whether to serve user pages for all of this protocol's users on the fed.brid.gy. If ``False``, user pages will only be served for users who have explictly opted in."
1✔
140
    SUPPORTED_AS1_TYPES = ()
1✔
141
    'sequence of str: AS1 objectTypes and verbs that this protocol supports receiving and sending'
1✔
142
    SUPPORTS_DMS = False
1✔
143
    'bool: whether this protocol can receive DMs (chat messages)'
1✔
144
    USES_OBJECT_FEED = False
1✔
145
    'bool: whether to store followers on this protocol in :attr:`Object.feed`.'
1✔
146
    HTML_PROFILES = False
1✔
147
    'bool: whether this protocol supports HTML in profile descriptions. If False, profile descriptions should be plain text.'
1✔
148
    SEND_REPLIES_TO_ORIG_POSTS_MENTIONS = False
1✔
149
    "bool: whether replies to this protocol should include the original post's mentions as delivery targets"
1✔
150
    BOTS_FOLLOW_BACK = False
1✔
151
    'bool: when a user on this protocol follows a bot user to enable bridging, does the bot follow them back?'
1✔
152
    HANDLES_PER_PAY_LEVEL_DOMAIN = None
1✔
153
    'int: how many users to allow with handles on the same pay-level domain. None for no limit.'
1✔
154
    RECEIVE_FILTERS = ()
1✔
155
    'tuple of callable: filter functions from filters.py to apply to incoming activities. Applied in order, so put the cheapest filters first.'
1✔
156
    RATE_LIMIT_TYPE = memcache.RateLimitType.LINEAR
1✔
157
    'Whether receive and send task rate limiting increases linearly or exponential.'
1✔
158

159
    @classmethod
1✔
160
    @property
1✔
161
    def LABEL(cls):
1✔
162
        """str: human-readable lower case name of this protocol, eg ``'activitypub``"""
163
        return cls.__name__.lower()
1✔
164

165
    @staticmethod
1✔
166
    def for_request(fed=None):
1✔
167
        """Returns the protocol for the current request.
168

169
        ...based on the request's hostname.
170

171
        Args:
172
          fed (str or protocol.Protocol): protocol to return if the current
173
            request is on ``fed.brid.gy``
174

175
        Returns:
176
          Protocol: protocol, or None if the provided domain or request hostname
177
          domain is not a subdomain of ``brid.gy`` or isn't a known protocol
178
        """
179
        return Protocol.for_bridgy_subdomain(request.host, fed=fed)
1✔
180

181
    @staticmethod
1✔
182
    def for_bridgy_subdomain(domain_or_url, fed=None):
1✔
183
        """Returns the protocol for a brid.gy subdomain.
184

185
        Args:
186
          domain_or_url (str)
187
          fed (str or protocol.Protocol): protocol to return if the current
188
            request is on ``fed.brid.gy``
189

190
        Returns:
191
          class: :class:`Protocol` subclass, or None if the provided domain or request
192
          hostname domain is not a subdomain of ``brid.gy`` or isn't a known
193
          protocol
194
        """
195
        domain = (util.domain_from_link(domain_or_url, minimize=False)
1✔
196
                  if util.is_web(domain_or_url)
197
                  else domain_or_url)
198

199
        if domain == PRIMARY_DOMAIN or domain in LOCAL_DOMAINS:
1✔
200
            return PROTOCOLS[fed] if isinstance(fed, str) else fed
1✔
201
        elif domain and domain.endswith(SUPERDOMAIN):
1✔
202
            label = domain.removesuffix(SUPERDOMAIN)
1✔
203
            return PROTOCOLS.get(label)
1✔
204

205
    @classmethod
1✔
206
    def owns_id(cls, id):
1✔
207
        """Returns whether this protocol owns the id, or None if it's unclear.
208

209
        To be implemented by subclasses.
210

211
        IDs are string identities that uniquely identify users or objects, and
212
        are intended primarily to be machine readable and usable. Compare to
213
        handles, which are human-chosen, human-meaningful, and often but not
214
        always unique.
215

216
        Some protocols' ids are more or less deterministic based on the id
217
        format, eg AT Protocol owns ``at://`` URIs and DIDs. Others, like
218
        http(s) URLs, could be owned by eg Web or ActivityPub.
219

220
        This should be a quick guess without expensive side effects, eg no
221
        external HTTP fetches to fetch the id itself or otherwise perform
222
        discovery.
223

224
        Returns False if the id's domain is in :const:`domains.DOMAIN_BLOCKLIST`.
225

226
        Args:
227
          id (str): user id or object id
228

229
        Returns:
230
          bool or None:
231
        """
232
        return False
1✔
233

234
    @classmethod
1✔
235
    def owns_handle(cls, handle, allow_internal=False):
1✔
236
        """Returns whether this protocol owns the handle, or None if it's unclear.
237

238
        To be implemented by subclasses.
239

240
        Handles are string identities that are human-chosen, human-meaningful,
241
        and often but not always unique. Compare to IDs, which uniquely identify
242
        users, and are intended primarily to be machine readable and usable.
243

244
        Some protocols' handles are more or less deterministic based on the id
245
        format, eg ActivityPub (technically WebFinger) handles are
246
        ``@user@instance.com``. Others, like domains, could be owned by eg Web,
247
        ActivityPub, AT Protocol, or others.
248

249
        This should be a quick guess without expensive side effects, eg no
250
        external HTTP fetches to fetch the id itself or otherwise perform
251
        discovery.
252

253
        Args:
254
          handle (str)
255
          allow_internal (bool): whether to return False for internal domains
256
            like ``fed.brid.gy``, ``bsky.brid.gy``, etc
257

258
        Returns:
259
          bool or None
260
        """
261
        return False
1✔
262

263
    @classmethod
1✔
264
    def handle_to_id(cls, handle):
1✔
265
        """Converts a handle to an id.
266

267
        To be implemented by subclasses.
268

269
        May incur network requests, eg DNS queries or HTTP requests. Avoids
270
        blocked or opted out users.
271

272
        Args:
273
          handle (str)
274

275
        Returns:
276
          str: corresponding id, or None if the handle can't be found
277
        """
278
        raise NotImplementedError()
1✔
279

280
    @classmethod
1✔
281
    def authed_user_for_request(cls):
1✔
282
        """Returns the authenticated user id for the current request.
283

284

285
        Checks authentication on the current request, eg HTTP Signature for
286
        ActivityPub. To be implemented by subclasses.
287

288
        Returns:
289
          str: authenticated user id, or None if there is no authentication
290

291
        Raises:
292
          RuntimeError: if the request's authentication (eg signature) is
293
          invalid or otherwise can't be verified
294
        """
295
        return None
1✔
296

297
    @classmethod
1✔
298
    def key_for(cls, id, allow_opt_out=False):
1✔
299
        """Returns the :class:`google.cloud.ndb.Key` for a given id's :class:`models.User`.
300

301
        If called via `Protocol.key_for`, infers the appropriate protocol with
302
        :meth:`for_id`. If called with a concrete subclass, uses that subclass
303
        as is.
304

305
        Args:
306
          id (str):
307
          allow_opt_out (bool): whether to allow users who are currently opted out
308

309
        Returns:
310
          google.cloud.ndb.Key: matching key, or None if the given id is not a
311
          valid :class:`User` id for this protocol.
312
        """
313
        if cls == Protocol:
1✔
314
            proto = Protocol.for_id(id)
1✔
315
            return proto.key_for(id, allow_opt_out=allow_opt_out) if proto else None
1✔
316

317
        # load user so that we follow use_instead
318
        existing = cls.get_by_id(id, allow_opt_out=True)
1✔
319
        if existing:
1✔
320
            if existing.status and not allow_opt_out:
1✔
321
                return None
1✔
322
            return existing.key
1✔
323

324
        return cls(id=id).key
1✔
325

326
    @staticmethod
1✔
327
    def _for_id_memcache_key(id, remote=None):
1✔
328
        """If id is a URL, uses its domain, otherwise returns None.
329

330
        Args:
331
          id (str)
332

333
        Returns:
334
          (str domain, bool remote) or None
335
        """
336
        domain = util.domain_from_link(id)
1✔
337
        if domain in PROTOCOL_DOMAINS:
1✔
338
            return id
1✔
339
        elif remote and util.is_web(id):
1✔
340
            return domain
1✔
341

342
    @cached(LRUCache(20000), lock=Lock())
1✔
343
    @memcache.memoize(key=_for_id_memcache_key, write=lambda id, remote=True: remote,
1✔
344
                      version=3)
345
    @staticmethod
1✔
346
    def for_id(id, remote=True):
1✔
347
        """Returns the protocol for a given id.
348

349
        Args:
350
          id (str)
351
          remote (bool): whether to perform expensive side effects like fetching
352
            the id itself over the network, or other discovery.
353

354
        Returns:
355
          Protocol subclass: matching protocol, or None if no single known
356
          protocol definitively owns this id
357
        """
358
        logger.debug(f'Determining protocol for id {id}')
1✔
359
        if not id:
1✔
360
            return None
1✔
361

362
        # remove our synthetic id fragment, if any
363
        #
364
        # will this eventually cause false positives for other services that
365
        # include our full ids inside their own ids, non-URL-encoded? guess
366
        # we'll figure that out if/when it happens.
367
        id = id.partition('#bridgy-fed-')[0]
1✔
368
        if not id:
1✔
369
            return None
1✔
370

371
        if util.is_web(id):
1✔
372
            # step 1: check for our per-protocol subdomains
373
            try:
1✔
374
                parsed = urlparse(id)
1✔
375
            except ValueError as e:
1✔
376
                logger.info(f'urlparse ValueError: {e}')
1✔
377
                return None
1✔
378

379
            is_internal = parsed.path.startswith(ids.INTERNAL_PATH_PREFIX)
1✔
380
            by_subdomain = Protocol.for_bridgy_subdomain(id)
1✔
381
            if by_subdomain and not (util.is_homepage(id) or is_internal
1✔
382
                                     or id in ids.BOT_ACTOR_AP_IDS):
383
                logger.debug(f'  {by_subdomain.LABEL} owns id {id}')
1✔
384
                return by_subdomain
1✔
385

386
        # step 2: check if any Protocols say conclusively that they own it
387
        # sort to be deterministic
388
        protocols = sorted(set(p for p in PROTOCOLS.values() if p),
1✔
389
                           key=lambda p: p.LABEL)
390
        candidates = []
1✔
391
        for protocol in protocols:
1✔
392
            owns = protocol.owns_id(id)
1✔
393
            if owns:
1✔
394
                logger.debug(f'  {protocol.LABEL} owns id {id}')
1✔
395
                return protocol
1✔
396
            elif owns is not False:
1✔
397
                candidates.append(protocol)
1✔
398

399
        if len(candidates) == 1:
1✔
400
            logger.debug(f'  {candidates[0].LABEL} owns id {id}')
1✔
401
            return candidates[0]
1✔
402

403
        # step 3: look for existing Objects in the datastore
404
        #
405
        # note that we don't currently see if this is a copy id because I have FUD
406
        # over which Protocol for_id should return in that case...and also because a
407
        # protocol may already say definitively above that it owns the id, eg ATProto
408
        # with DIDs and at:// URIs.
409
        obj = Protocol.load(id, remote=False)
1✔
410
        if obj and obj.source_protocol:
1✔
411
            logger.debug(f'  {obj.key.id()} owned by source_protocol {obj.source_protocol}')
1✔
412
            return PROTOCOLS[obj.source_protocol]
1✔
413

414
        # step 4: fetch over the network, if necessary
415
        if not remote:
1✔
416
            return None
1✔
417

418
        for protocol in candidates:
1✔
419
            logger.debug(f'Trying {protocol.LABEL}')
1✔
420
            try:
1✔
421
                obj = protocol.load(id, local=False, remote=True)
1✔
422

423
                if protocol.ABBREV == 'web':
1✔
424
                    # for web, if we fetch and get HTML without microformats,
425
                    # load returns False but the object will be stored in the
426
                    # datastore with source_protocol web, and in cache. load it
427
                    # again manually to check for that.
428
                    obj = Object.get_by_id(id)
1✔
429
                    if obj and obj.source_protocol != 'web':
1✔
430
                        obj = None
×
431

432
                if obj:
1✔
433
                    logger.debug(f'  {protocol.LABEL} owns id {id}')
1✔
434
                    return protocol
1✔
435
            except BadGateway:
1✔
436
                # we tried and failed fetching the id over the network.
437
                # this depends on ActivityPub.fetch raising this!
438
                return None
1✔
439
            except HTTPException as e:
×
440
                # internal error we generated ourselves; try next protocol
441
                pass
×
442
            except Exception as e:
×
443
                code, _ = util.interpret_http_exception(e)
×
444
                if code:
×
445
                    # we tried and failed fetching the id over the network
446
                    return None
×
447
                raise
×
448

449
        logger.info(f'No matching protocol found for {id} !')
1✔
450
        return None
1✔
451

452
    @cached(LRUCache(20000), lock=Lock())
1✔
453
    @staticmethod
1✔
454
    def for_handle(handle):
1✔
455
        """Returns the protocol for a given handle.
456

457
        May incur expensive side effects like resolving the handle itself over
458
        the network or other discovery.
459

460
        Args:
461
          handle (str)
462

463
        Returns:
464
          (Protocol subclass, str) tuple: matching protocol and optional id (if
465
          resolved), or ``(None, None)`` if no known protocol owns this handle
466
        """
467
        # TODO: normalize, eg convert domains to lower case
468
        logger.debug(f'Determining protocol for handle {handle}')
1✔
469
        if not handle:
1✔
470
            return (None, None)
1✔
471

472
        # step 1: check if any Protocols say conclusively that they own it.
473
        # sort to be deterministic.
474
        protocols = sorted(set(p for p in PROTOCOLS.values() if p),
1✔
475
                           key=lambda p: p.LABEL)
476
        candidates = []
1✔
477
        for proto in protocols:
1✔
478
            owns = proto.owns_handle(handle)
1✔
479
            if owns:
1✔
480
                logger.debug(f'  {proto.LABEL} owns handle {handle}')
1✔
481
                return (proto, None)
1✔
482
            elif owns is not False:
1✔
483
                candidates.append(proto)
1✔
484

485
        if len(candidates) == 1:
1✔
486
            logger.debug(f'  {candidates[0].LABEL} owns handle {handle}')
1✔
487
            return (candidates[0], None)
1✔
488

489
        # step 2: look for matching User in the datastore
490
        for proto in candidates:
1✔
491
            user = proto.query(proto.handle == handle).get()
1✔
492
            if user:
1✔
493
                if user.status:
1✔
494
                    return (None, None)
1✔
495
                logger.debug(f'  user {user.key} handle {handle}')
1✔
496
                return (proto, user.key.id())
1✔
497

498
        # step 3: resolve handle to id
499
        for proto in candidates:
1✔
500
            id = proto.handle_to_id(handle)
1✔
501
            if id:
1✔
502
                logger.debug(f'  {proto.LABEL} resolved handle {handle} to id {id}')
1✔
503
                return (proto, id)
1✔
504

505
        logger.info(f'No matching protocol found for handle {handle} !')
1✔
506
        return (None, None)
1✔
507

508
    @classmethod
1✔
509
    def is_user_at_domain(cls, handle, allow_internal=False):
1✔
510
        """Returns True if handle is formatted ``user@domain.tld``, False otherwise.
511

512
        Example: ``@user@instance.com``
513

514
        Args:
515
          handle (str)
516
          allow_internal (bool): whether the domain can be a Bridgy Fed domain
517
        """
518
        parts = handle.split('@')
1✔
519
        if len(parts) != 2:
1✔
520
            return False
1✔
521

522
        user, domain = parts
1✔
523
        return bool(user and domain
1✔
524
                    and not cls.is_blocklisted(domain, allow_internal=allow_internal))
525

526
    @classmethod
1✔
527
    def bridged_web_url_for(cls, user, fallback=False):
1✔
528
        """Returns the web URL for a user's bridged profile in this protocol.
529

530
        For example, for Web user ``alice.com``, :meth:`ATProto.bridged_web_url_for`
531
        returns ``https://bsky.app/profile/alice.com.web.brid.gy``
532

533
        Args:
534
          user (models.User)
535
          fallback (bool): if True, and bridged users have no canonical user
536
            profile URL in this protocol, return the native protocol's profile URL
537

538
        Returns:
539
          str, or None if there isn't a canonical URL
540
        """
541
        if fallback:
1✔
542
            return user.web_url()
1✔
543

544
    @classmethod
1✔
545
    def actor_key(cls, obj, allow_opt_out=False):
1✔
546
        """Returns the :class:`User`: key for a given object's author or actor.
547

548
        Args:
549
          obj (models.Object)
550
          allow_opt_out (bool): whether to return a user key if they're opted out
551

552
        Returns:
553
          google.cloud.ndb.key.Key or None:
554
        """
555
        owner = as1.get_owner(obj.as1)
1✔
556
        if owner:
1✔
557
            return cls.key_for(owner, allow_opt_out=allow_opt_out)
1✔
558

559
    @classmethod
1✔
560
    def bot_user_id(cls):
1✔
561
        """Returns the Web user id for the bot user for this protocol.
562

563
        For example, ``'bsky.brid.gy'`` for ATProto.
564

565
        Returns:
566
          str:
567
        """
568
        return f'{cls.ABBREV}{SUPERDOMAIN}'
1✔
569

570
    @classmethod
1✔
571
    def create_for(cls, user):
1✔
572
        """Creates or re-activate a copy user in this protocol.
573

574
        Should add the copy user to :attr:`copies`.
575

576
        If the copy user already exists and active, should do nothing.
577

578
        Args:
579
          user (models.User): original source user. Shouldn't already have a
580
            copy user for this protocol in :attr:`copies`.
581

582
        Raises:
583
          ValueError: if we can't create a copy of the given user in this protocol
584
        """
585
        raise NotImplementedError()
×
586

587
    @classmethod
1✔
588
    def send(to_cls, obj, target, from_user=None, orig_obj_id=None):
1✔
589
        """Sends an outgoing activity.
590

591
        To be implemented by subclasses. Should call
592
        ``to_cls.translate_ids(obj.as1)`` before converting it to this Protocol's
593
        format.
594

595
        NOTE: if this protocol's ``HAS_COPIES`` is True, and this method creates a
596
        copy and sends it, it *must* add that copy to the *object*'s (not activity's)
597
        :attr:`copies`, and store it back in the datastore, *in a transaction*!
598

599
        Args:
600
          obj (models.Object): with activity to send
601
          target (str): destination URL to send to
602
          from_user (models.User): user (actor) this activity is from
603
          orig_obj_id (str): :class:`models.Object` key id of the "original object"
604
            that this object refers to, eg replies to or reposts or likes
605

606
        Returns:
607
          bool: True if the activity is sent successfully, False if it is
608
          ignored or otherwise unsent due to protocol logic, eg no webmention
609
          endpoint, protocol doesn't support the activity type. (Failures are
610
          raised as exceptions.)
611

612
        Raises:
613
          werkzeug.HTTPException if the request fails
614
        """
615
        raise NotImplementedError()
×
616

617
    @classmethod
1✔
618
    def fetch(cls, obj, **kwargs):
1✔
619
        """Fetches a protocol-specific object and populates it in an :class:`Object`.
620

621
        Errors are raised as exceptions. If this method returns False, the fetch
622
        didn't fail but didn't succeed either, eg the id isn't valid for this
623
        protocol, or the fetch didn't return valid data for this protocol.
624

625
        To be implemented by subclasses.
626

627
        Args:
628
          obj (models.Object): with the id to fetch. Data is filled into one of
629
            the protocol-specific properties, eg ``as2``, ``mf2``, ``bsky``.
630
          kwargs: subclass-specific
631

632
        Returns:
633
          bool: True if the object was fetched and populated successfully,
634
          False otherwise
635

636
        Raises:
637
          requests.RequestException, werkzeug.HTTPException,
638
          websockets.WebSocketException, etc: if the fetch fails
639
        """
640
        raise NotImplementedError()
×
641

642
    @classmethod
1✔
643
    def convert(cls, obj, from_user=None, **kwargs):
1✔
644
        """Converts an :class:`Object` to this protocol's data format.
645

646
        For example, an HTML string for :class:`Web`, or a dict with AS2 JSON
647
        and ``application/activity+json`` for :class:`ActivityPub`.
648

649
        Just passes through to :meth:`_convert`, then does minor
650
        protocol-independent postprocessing.
651

652
        Args:
653
          obj (models.Object):
654
          from_user (models.User): user (actor) this activity/object is from
655
          kwargs: protocol-specific, passed through to :meth:`_convert`
656

657
        Returns:
658
          converted object in the protocol's native format, often a dict
659
        """
660
        if not obj or not obj.as1:
1✔
661
            return {}
1✔
662

663
        id = obj.key.id() if obj.key else obj.as1.get('id')
1✔
664
        is_crud = obj.as1.get('verb') in as1.CRUD_VERBS
1✔
665
        base_obj = as1.get_object(obj.as1) if is_crud else obj.as1
1✔
666
        orig_our_as1 = obj.our_as1
1✔
667

668
        # post-processing for user profiles
669
        if (from_user and from_user.is_profile(obj)
1✔
670
                and PROTOCOLS.get(obj.source_protocol) != cls
671
                and Protocol.for_bridgy_subdomain(id) not in DOMAINS):
672
            # TODO: more systematic way to get this that covers all protocols,
673
            # eg Nostr NIP-05
674
            web_opted_in = (from_user.LABEL == 'web' and
1✔
675
                            (from_user.last_webmention_in
676
                             or from_user.has_redirects
677
                             or from_user.handle_as('atproto') == from_user.key.id()))
678
            if not web_opted_in:
1✔
679
                # mark bridged actors as bots and add "bridged by Bridgy Fed" to
680
                # their bios. (web users are special cased, they don't get the label
681
                # if they've explicitly enabled Bridgy Fed with redirects or
682
                # webmentions.)
683
                cls.add_source_links(obj=obj, from_user=from_user)
1✔
684

685
                # web is currently opt out, so add [Unofficial] to their display name
686
                # to be explicit that they may not have enabled this themselves
687
                if from_user.LABEL == 'web':
1✔
688
                    if obj.our_as1 is orig_our_as1:
1✔
689
                        obj.our_as1 = copy.deepcopy(obj.as1)
×
690
                    actor = as1.get_object(obj.our_as1) if is_crud else obj.our_as1
1✔
691
                    if ((name := actor.get('displayName'))
1✔
692
                            and not name.endswith(' [Unofficial]')):
693
                        actor['displayName'] = f'{name} [Unofficial]'
1✔
694

695
        converted = cls._convert(obj, from_user=from_user, **kwargs)
1✔
696
        obj.our_as1 = orig_our_as1
1✔
697
        return converted
1✔
698

699
    @classmethod
1✔
700
    def _convert(cls, obj, from_user=None, **kwargs):
1✔
701
        """Converts an :class:`Object` to this protocol's data format.
702

703
        To be implemented by subclasses. Implementations should generally call
704
        :meth:`Protocol.translate_ids` (as their own class) before converting to
705
        their format.
706

707
        Args:
708
          obj (models.Object):
709
          from_user (models.User): user (actor) this activity/object is from
710
          kwargs: protocol-specific
711

712
        Returns:
713
          converted object in the protocol's native format, often a dict. May
714
            return the ``{}`` empty dict if the object can't be converted.
715
        """
716
        raise NotImplementedError()
×
717

718
    @classmethod
1✔
719
    def add_source_links(cls, obj, from_user):
1✔
720
        """Adds "bridged from ... by Bridgy Fed" to the user's actor's ``summary``.
721

722
        Uses HTML for protocols that support it, plain text otherwise.
723

724
        Args:
725
          cls (Protocol subclass): protocol that the user is bridging into
726
          obj (models.Object): user's actor/profile object
727
          from_user (models.User): user (actor) this activity/object is from
728
        """
729
        assert obj and obj.as1
1✔
730
        assert from_user
1✔
731

732
        obj.our_as1 = copy.deepcopy(obj.as1)
1✔
733
        actor = (as1.get_object(obj.as1) if obj.type in as1.CRUD_VERBS
1✔
734
                 else obj.as1)
735
        actor.setdefault('objectType', 'person')
1✔
736

737
        orig_summary = actor.setdefault('summary', '')
1✔
738
        summary_text = html_to_text(orig_summary, ignore_links=True)
1✔
739

740
        # Check if we've already added source links
741
        if '🌉 bridged' in summary_text:
1✔
742
            return
1✔
743

744
        actor_id = actor.get('id')
1✔
745

746
        url = (as1.get_url(actor)
1✔
747
               or (from_user.web_url() if from_user.profile_id() == actor_id
748
                   else actor_id))
749

750
        from web import Web
1✔
751
        bot_user = Web.get_by_id(from_user.bot_user_id())
1✔
752

753
        if cls.HTML_PROFILES:
1✔
754
            if bot_user and from_user.LABEL not in cls.DEFAULT_ENABLED_PROTOCOLS:
1✔
755
                mention = bot_user.html_link(proto=cls, name=False, handle='short')
1✔
756
                suffix = f', follow {mention} to interact'
1✔
757
            else:
758
                suffix = f' by <a href="https://{PRIMARY_DOMAIN}/">Bridgy Fed</a>'
1✔
759

760
            separator = '<br><br>'
1✔
761

762
            is_user = from_user.key and actor_id in (from_user.key.id(),
1✔
763
                                                     from_user.profile_id())
764
            if is_user:
1✔
765
                bridged = f'🌉 <a href="https://{PRIMARY_DOMAIN}{from_user.user_page_path()}">bridged</a>'
1✔
766
                from_ = f'<a href="{from_user.web_url()}">{from_user.handle}</a>'
1✔
767
            else:
768
                bridged = '🌉 bridged'
×
769
                from_ = util.pretty_link(url) if url else '?'
×
770

771
        else:  # plain text
772
            # TODO: unify with above. which is right?
773
            id = obj.key.id() if obj.key else obj.our_as1.get('id')
1✔
774
            is_user = from_user.key and id in (from_user.key.id(),
1✔
775
                                               from_user.profile_id())
776
            from_ = (from_user.web_url() if is_user else url) or '?'
1✔
777

778
            bridged = '🌉 bridged'
1✔
779
            suffix = (
1✔
780
                f': https://{PRIMARY_DOMAIN}{from_user.user_page_path()}'
781
                # link web users to their user pages
782
                if from_user.LABEL == 'web'
783
                else f', follow @{bot_user.handle_as(cls)} to interact'
784
                if bot_user and from_user.LABEL not in cls.DEFAULT_ENABLED_PROTOCOLS
785
                else f' by https://{PRIMARY_DOMAIN}/')
786
            separator = '\n\n'
1✔
787
            orig_summary = summary_text
1✔
788

789
        logo = f'{from_user.LOGO_EMOJI} ' if from_user.LOGO_EMOJI else ''
1✔
790
        source_links = f'{separator if orig_summary else ""}{bridged} from {logo}{from_}{suffix}'
1✔
791
        actor['summary'] = orig_summary + source_links
1✔
792

793
    @classmethod
1✔
794
    def set_username(to_cls, user, username):
1✔
795
        """Sets a custom username for a user's bridged account in this protocol.
796

797
        Args:
798
          user (models.User)
799
          username (str)
800

801
        Raises:
802
          ValueError: if the username is invalid
803
          RuntimeError: if the username could not be set
804
        """
805
        raise NotImplementedError()
1✔
806

807
    @classmethod
1✔
808
    def migrate_out(cls, user, to_user_id):
1✔
809
        """Migrates a bridged account out to be a native account.
810

811
        Args:
812
          user (models.User)
813
          to_user_id (str)
814

815
        Raises:
816
          ValueError: eg if this protocol doesn't own ``to_user_id``, or if
817
            ``user`` is on this protocol or not bridged to this protocol
818
        """
819
        raise NotImplementedError()
×
820

821
    @classmethod
1✔
822
    def check_can_migrate_out(cls, user, to_user_id):
1✔
823
        """Raises an exception if a user can't yet migrate to a native account.
824

825
        For example, if ``to_user_id`` isn't on this protocol, or if ``user`` is on
826
        this protocol, or isn't bridged to this protocol.
827

828
        If the user is ready to migrate, returns ``None``.
829

830
        Subclasses may override this to add more criteria, but they should call this
831
        implementation first.
832

833
        Args:
834
          user (models.User)
835
          to_user_id (str)
836

837
        Raises:
838
          ValueError: if ``user`` isn't ready to migrate to this protocol yet
839
        """
840
        def _error(msg):
1✔
841
            logger.warning(msg)
1✔
842
            raise ValueError(msg)
1✔
843

844
        if cls.owns_id(to_user_id) is False:
1✔
845
            _error(f"{to_user_id} doesn't look like an {cls.LABEL} id")
1✔
846
        elif isinstance(user, cls):
1✔
847
            _error(f"{user.handle_or_id()} is on {cls.PHRASE}")
1✔
848
        elif not user.is_enabled(cls):
1✔
849
            _error(f"{user.handle_or_id()} isn't currently bridged to {cls.PHRASE}")
1✔
850

851
    @classmethod
1✔
852
    def migrate_in(cls, user, from_user_id, **kwargs):
1✔
853
        """Migrates a native account in to be a bridged account.
854

855
        The protocol independent parts are done here; protocol-specific parts are
856
        done in :meth:`_migrate_in`, which this wraps.
857

858
        Reloads the user's profile before calling :meth:`_migrate_in`.
859

860
        Args:
861
          user (models.User): native user on another protocol to attach the
862
            newly imported bridged account to
863
          from_user_id (str)
864
          kwargs: additional protocol-specific parameters
865

866
        Raises:
867
          ValueError: eg if this protocol doesn't own ``from_user_id``, or if
868
            ``user`` is on this protocol or already bridged to this protocol
869
        """
870
        def _error(msg):
1✔
871
            logger.warning(msg)
1✔
872
            raise ValueError(msg)
1✔
873

874
        logger.info(f"Migrating in {from_user_id} for {user.key.id()}")
1✔
875

876
        # check req'ts
877
        if cls.owns_id(from_user_id) is False:
1✔
878
            _error(f"{from_user_id} doesn't look like an {cls.LABEL} id")
1✔
879
        elif isinstance(user, cls):
1✔
880
            _error(f"{user.handle_or_id()} is on {cls.PHRASE}")
1✔
881
        elif cls.HAS_COPIES and cls.LABEL in user.enabled_protocols:
1✔
882
            _error(f"{user.handle_or_id()} is already bridged to {cls.PHRASE}")
1✔
883

884
        # reload profile
885
        try:
1✔
886
            user.reload_profile()
1✔
887
        except (RequestException, HTTPException) as e:
×
888
            _, msg = util.interpret_http_exception(e)
×
889

890
        # migrate!
891
        cls._migrate_in(user, from_user_id, **kwargs)
1✔
892
        user.add('enabled_protocols', cls.LABEL)
1✔
893
        user.put()
1✔
894

895
        # attach profile object
896
        if user.obj:
1✔
897
            if cls.HAS_COPIES:
1✔
898
                profile_id = ids.profile_id(id=from_user_id, proto=cls)
1✔
899
                user.obj.remove_copies_on(cls)
1✔
900
                user.obj.add('copies', Target(uri=profile_id, protocol=cls.LABEL))
1✔
901
                user.obj.put()
1✔
902

903
            common.create_task(queue='receive', obj_id=user.obj_key.id(),
1✔
904
                               authed_as=user.key.id())
905

906
    @classmethod
1✔
907
    def _migrate_in(cls, user, from_user_id, **kwargs):
1✔
908
        """Protocol-specific parts of migrating in external account.
909

910
        Called by :meth:`migrate_in`, which does most of the work, including calling
911
        :meth:`reload_profile` before this.
912

913
        Args:
914
          user (models.User): native user on another protocol to attach the
915
            newly imported account to. Unused.
916
          from_user_id (str): DID of the account to be migrated in
917
          kwargs: protocol dependent
918
        """
919
        raise NotImplementedError()
×
920

921
    @classmethod
1✔
922
    def target_for(cls, obj, shared=False):
1✔
923
        """Returns an :class:`Object`'s delivery target (endpoint).
924

925
        To be implemented by subclasses.
926

927
        Examples:
928

929
        * If obj has ``source_protocol`` ``web``, returns its URL, as a
930
          webmention target.
931
        * If obj is an ``activitypub`` actor, returns its inbox.
932
        * If obj is an ``activitypub`` object, returns it's author's or actor's
933
          inbox.
934

935
        Args:
936
          obj (models.Object):
937
          shared (bool): optional. If True, returns a common/shared
938
            endpoint, eg ActivityPub's ``sharedInbox``, that can be reused for
939
            multiple recipients for efficiency
940

941
        Returns:
942
          str: target endpoint, or None if not available.
943
        """
944
        raise NotImplementedError()
×
945

946
    @classmethod
1✔
947
    def is_blocklisted(cls, url, allow_internal=False):
1✔
948
        """Returns True if we block the given URL and shouldn't deliver to it.
949

950
        Default implementation here, subclasses may override.
951

952
        Args:
953
          url (str):
954
          allow_internal (bool): whether to return False for internal domains
955
            like ``fed.brid.gy``, ``bsky.brid.gy``, etc
956
        """
957
        blocklist = DOMAIN_BLOCKLIST
1✔
958
        if not DEBUG:
1✔
959
            blocklist += tuple(util.RESERVED_TLDS | util.LOCAL_TLDS)
1✔
960
        if not allow_internal:
1✔
961
            blocklist += DOMAINS
1✔
962
        return util.domain_or_parent_in(url, blocklist)
1✔
963

964
    @classmethod
1✔
965
    def translate_ids(to_cls, obj):
1✔
966
        """Translates all ids in an AS1 object to a specific protocol.
967

968
        Infers source protocol for each id value separately.
969

970
        For example, if ``proto`` is :class:`ActivityPub`, the ATProto URI
971
        ``at://did:plc:abc/coll/123`` will be converted to
972
        ``https://bsky.brid.gy/ap/at://did:plc:abc/coll/123``.
973

974
        Wraps these AS1 fields:
975

976
        * ``id``
977
        * ``actor``
978
        * ``author``
979
        * ``bcc``
980
        * ``bto``
981
        * ``cc``
982
        * ``featured[].items``, ``featured[].orderedItems``
983
        * ``object``
984
        * ``object.actor``
985
        * ``object.author``
986
        * ``object.id``
987
        * ``object.inReplyTo``
988
        * ``object.object``
989
        * ``attachments[].id``
990
        * ``tags[objectType=mention].url``
991
        * ``to``
992

993
        This is the inverse of :meth:`models.Object.resolve_ids`. Much of the
994
        same logic is duplicated there!
995

996
        TODO: unify with :meth:`Object.resolve_ids`,
997
        :meth:`models.Object.normalize_ids`.
998

999
        Args:
1000
          to_proto (Protocol subclass)
1001
          obj (dict): AS1 object or activity (not :class:`models.Object`!)
1002

1003
        Returns:
1004
          dict: translated AS1 version of ``obj``
1005
        """
1006
        from ui import UIProtocol
1✔
1007

1008
        assert to_cls != Protocol
1✔
1009
        if not obj:
1✔
1010
            return obj
1✔
1011

1012
        outer_obj = to_cls.translate_mention_handles(copy.deepcopy(obj))
1✔
1013
        inner_objs = outer_obj['object'] = as1.get_objects(outer_obj)
1✔
1014

1015
        def translate(elem, field, fn, uri=False):
1✔
1016
            owner_id = as1.get_owner(elem)
1✔
1017
            owner_proto = Protocol.for_id(owner_id)
1✔
1018

1019
            elem[field] = as1.get_objects(elem, field)
1✔
1020
            for obj in elem[field]:
1✔
1021
                if id := obj.get('id'):
1✔
1022
                    if field in ('to', 'cc', 'bcc', 'bto') and as1.is_audience(id):
1✔
1023
                        continue
1✔
1024

1025
                    from_cls = Protocol.for_id(id)
1✔
1026
                    if field == 'id' and from_cls == UIProtocol and owner_proto:
1✔
1027
                        logger.info(f'owner of {id} {owner_id} is {owner_proto.LABEL}, translating id from that protocol')
1✔
1028
                        from_cls = owner_proto
1✔
1029

1030
                    # TODO: what if from_cls is None? relax translate_object_id,
1031
                    # make it a noop if we don't know enough about from/to?
1032
                    if from_cls and from_cls != to_cls:
1✔
1033
                        obj['id'] = fn(id=id, from_=from_cls, to=to_cls)
1✔
1034
                    if uri:
1✔
1035
                        obj['id'] = to_cls(id=obj['id']).id_uri() if obj['id'] else id
1✔
1036

1037
            elem[field] = [o['id'] if o.keys() == {'id'} else o
1✔
1038
                           for o in elem[field]]
1039

1040
            if len(elem[field]) == 1 and field not in ('items', 'orderedItems'):
1✔
1041
                elem[field] = elem[field][0]
1✔
1042

1043
        type = as1.object_type(outer_obj)
1✔
1044
        translate(outer_obj, 'id',
1✔
1045
                  ids.translate_user_id if type in as1.ACTOR_TYPES
1046
                  else ids.translate_object_id)
1047

1048
        for o in inner_objs:
1✔
1049
            is_actor = (as1.object_type(o) in as1.ACTOR_TYPES
1✔
1050
                        or as1.get_owner(outer_obj) == o.get('id')
1051
                        or type in ('follow', 'stop-following', 'block'))
1052
            translate(o, 'id', (ids.translate_user_id if is_actor
1✔
1053
                                else ids.translate_object_id))
1054
            # TODO: need to handle both user and object ids here
1055
            # https://github.com/snarfed/bridgy-fed/issues/2281
1056
            obj_is_actor = o.get('verb') in as1.VERBS_WITH_ACTOR_OBJECT
1✔
1057
            translate(o, 'object', (ids.translate_user_id if obj_is_actor
1✔
1058
                                    else ids.translate_object_id))
1059

1060
        for o in [outer_obj] + inner_objs:
1✔
1061
            translate(o, 'inReplyTo', ids.translate_object_id)
1✔
1062
            for field in 'actor', 'author', 'to', 'cc', 'bto', 'bcc':
1✔
1063
                translate(o, field, ids.translate_user_id)
1✔
1064
            for tag in as1.get_objects(o, 'tags'):
1✔
1065
                if tag.get('objectType') == 'mention':
1✔
1066
                    translate(tag, 'url', ids.translate_user_id, uri=True)
1✔
1067
            for att in as1.get_objects(o, 'attachments'):
1✔
1068
                translate(att, 'id', ids.translate_object_id)
1✔
1069
                url = att.get('url')
1✔
1070
                if url and not att.get('id'):
1✔
1071
                    if from_cls := Protocol.for_id(url):
1✔
1072
                        att['id'] = ids.translate_object_id(from_=from_cls, to=to_cls,
1✔
1073
                                                            id=url)
1074
            if feat := as1.get_object(o, 'featured'):
1✔
1075
                translate(feat, 'orderedItems', ids.translate_object_id)
1✔
1076
                translate(feat, 'items', ids.translate_object_id)
1✔
1077

1078
        outer_obj = util.trim_nulls(outer_obj)
1✔
1079

1080
        if objs := util.get_list(outer_obj ,'object'):
1✔
1081
            outer_obj['object'] = [o['id'] if o.keys() == {'id'} else o for o in objs]
1✔
1082
            if len(outer_obj['object']) == 1:
1✔
1083
                outer_obj['object'] = outer_obj['object'][0]
1✔
1084

1085
        return outer_obj
1✔
1086

1087
    @classmethod
1✔
1088
    def translate_mention_handles(cls, obj):
1✔
1089
        """Translates @-mentions in ``obj.content`` to this protocol's handles.
1090

1091
        Specifically, for each ``mention`` tag in the object's tags that has
1092
        ``startIndex`` and ``length``, replaces it in ``obj.content`` with that
1093
        user's translated handle in this protocol and updates the tag's location.
1094

1095
        Called by :meth:`Protocol.translate_ids`.
1096

1097
        If ``obj.content`` is HTML, does nothing.
1098

1099
        Args:
1100
          obj (dict): AS1 object
1101

1102
        Returns:
1103
          dict: modified AS1 object
1104
        """
1105
        if not obj:
1✔
1106
            return None
×
1107

1108
        obj = copy.deepcopy(obj)
1✔
1109
        obj['object'] = [cls.translate_mention_handles(o)
1✔
1110
                                for o in as1.get_objects(obj)]
1111
        if len(obj['object']) == 1:
1✔
1112
            obj['object'] = obj['object'][0]
1✔
1113

1114
        content = obj.get('content')
1✔
1115
        tags = obj.get('tags')
1✔
1116
        if (not content or not tags
1✔
1117
                or obj.get('content_is_html')
1118
                or bool(BeautifulSoup(content, 'html.parser').find())
1119
                or HTML_ENTITY_RE.search(content)):
1120
            return util.trim_nulls(obj)
1✔
1121

1122
        indexed = [tag for tag in tags if tag.get('startIndex') and tag.get('length')]
1✔
1123

1124
        offset = 0
1✔
1125
        last_orig_end = 0
1✔
1126
        for tag in sorted(indexed, key=lambda t: t['startIndex']):
1✔
1127
            orig_start = tag['startIndex']
1✔
1128
            if orig_start < last_orig_end:
1✔
1129
                logger.warning(f'tags overlap! removing indices from {tag.get("url")}')
1✔
1130
                del tag['startIndex']
1✔
1131
                del tag['length']
1✔
1132
                continue
1✔
1133

1134
            orig_end = orig_start + tag['length']
1✔
1135
            last_orig_end = orig_end
1✔
1136
            tag['startIndex'] += offset
1✔
1137
            if tag.get('objectType') == 'mention' and (id := tag['url']):
1✔
1138
                if proto := Protocol.for_id(id):
1✔
1139
                    id = ids.normalize_user_id(id=id, proto=proto)
1✔
1140
                    if key := get_original_user_key(id):
1✔
1141
                        user = key.get()
×
1142
                    else:
1143
                        user = proto.get_or_create(id, allow_opt_out=True)
1✔
1144
                    if user:
1✔
1145
                        start = tag['startIndex']
1✔
1146
                        end = start + tag['length']
1✔
1147
                        if handle := user.handle_as(cls):
1✔
1148
                            content = content[:start] + handle + content[end:]
1✔
1149
                            offset += len(handle) - tag['length']
1✔
1150
                            tag.update({
1✔
1151
                                'displayName': handle,
1152
                                'length': len(handle),
1153
                            })
1154

1155
        obj['tags'] = tags
1✔
1156
        as2.set_content(obj, content)  # sets content *and* contentMap; obj is still AS1 here
1✔
1157
        return util.trim_nulls(obj)
1✔
1158

1159
    @classmethod
1✔
1160
    def receive(from_cls, obj, authed_as=None, internal=False, received_at=None):
1✔
1161
        """Handles an incoming activity.
1162

1163
        If ``obj``'s key is unset, ``obj.as1``'s id field is used. If both are
1164
        unset, returns HTTP 299.
1165

1166
        Args:
1167
          obj (models.Object)
1168
          authed_as (str): authenticated actor id who sent this activity
1169
          internal (bool): whether to allow activity ids on internal domains,
1170
            from opted out/blocked users, etc.
1171
          received_at (datetime): when we first saw (received) this activity.
1172
            Right now only used for monitoring.
1173

1174
        Returns:
1175
          (str, int) tuple: (response body, HTTP status code) Flask response
1176

1177
        Raises:
1178
          werkzeug.HTTPException: if the request is invalid
1179
        """
1180
        # check some invariants
1181
        assert from_cls != Protocol
1✔
1182
        assert isinstance(obj, Object), obj
1✔
1183

1184
        if not obj.as1:
1✔
1185
            error('No object data provided')
1✔
1186

1187
        orig_obj = obj
1✔
1188
        id = None
1✔
1189
        if obj.key and obj.key.id():
1✔
1190
            id = obj.key.id()
1✔
1191

1192
        if not id:
1✔
1193
            id = obj.as1.get('id')
1✔
1194
            obj.key = ndb.Key(Object, id)
1✔
1195

1196
        if not id:
1✔
1197
            error('No id provided')
×
1198
        elif from_cls.owns_id(id) is False:
1✔
1199
            error(f'Protocol {from_cls.LABEL} does not own id {id}')
1✔
1200
        elif from_cls.is_blocklisted(id, allow_internal=internal):
1✔
1201
            error(f'{id} is blocklisted')
1✔
1202

1203
        # does this protocol support this activity/object type?
1204
        from_cls.check_supported(obj, 'receive')
1✔
1205

1206
        # lease this object, atomically
1207
        memcache_key = activity_id_memcache_key(id)
1✔
1208
        leased = memcache.memcache.add(
1✔
1209
            memcache_key, 'leased', noreply=False,
1210
            expire=int(MEMCACHE_LEASE_EXPIRATION.total_seconds()))
1211

1212
        # short circuit if we've already seen this activity id
1213
        if ('force' not in request.values
1✔
1214
            and (not leased
1215
                 or (obj.new is False and obj.changed is False))):
1216
            error(f'Already seen', status=204)
1✔
1217

1218
        pruned = {k: v for k, v in obj.as1.items()
1✔
1219
                  if k not in ('contentMap', 'replies', 'signature')}
1220
        delay = ''
1✔
1221
        retry = request.headers.get('X-AppEngine-TaskRetryCount')
1✔
1222
        if (received_at and retry in (None, '0')
1✔
1223
                and obj.type not in ('delete', 'undo')):  # we delay deletes/undos
1224
            delay_s = int((util.now().replace(tzinfo=None)
1✔
1225
                           - received_at.replace(tzinfo=None)
1226
                           ).total_seconds())
1227
            delay = f'({delay_s} s behind)'
1✔
1228
        logger.info(f'Receiving {from_cls.LABEL} {obj.type} {id} {delay} AS1: {json_dumps(pruned, indent=2)}')
1✔
1229

1230
        # check authorization
1231
        # https://www.w3.org/wiki/ActivityPub/Primer/Authentication_Authorization
1232
        actor = as1.get_owner(obj.as1)
1✔
1233
        if not actor:
1✔
1234
            error('Activity missing actor or author')
1✔
1235

1236
        if not (from_user_cls := obj.owner_protocol()):
1✔
1237
            error(f"couldn't determine owner protocol for {obj.key.id()} source_protocol {obj.source_protocol}", status=204)
×
1238
        elif from_user_cls.owns_id(actor) is False:
1✔
1239
            error(f"{from_user_cls.LABEL} doesn't own actor {actor}, this is probably a bridged activity. Skipping.", status=204)
1✔
1240

1241
        assert authed_as
1✔
1242
        assert isinstance(authed_as, str)
1✔
1243
        authed_as = ids.normalize_user_id(id=authed_as, proto=from_user_cls)
1✔
1244
        actor = ids.normalize_user_id(id=actor, proto=from_user_cls)
1✔
1245
        if actor != authed_as and not internal:
1✔
1246
            report_error("Auth: receive: authed_as doesn't match owner",
1✔
1247
                         user=f'{id} authed_as {authed_as} owner {actor}')
1248
            error(f"actor {actor} isn't authed user {authed_as}")
1✔
1249

1250
        # update copy ids to originals
1251
        obj.normalize_ids()
1✔
1252
        obj.resolve_ids()
1✔
1253

1254
        if (obj.type == 'follow'
1✔
1255
                and Protocol.for_bridgy_subdomain(as1.get_object(obj.as1).get('id'))):
1256
            # follows of bot user; refresh user profile first
1257
            logger.info(f'Follow of bot user, reloading {actor}')
1✔
1258
            from_user = from_user_cls.get_or_create(id=actor, allow_opt_out=True)
1✔
1259
            from_user.reload_profile()
1✔
1260
        else:
1261
            # load actor user
1262
            from_user = from_user_cls.get_or_create(id=actor, allow_opt_out=True)
1✔
1263

1264
        if not internal and (not from_user or from_user.manual_opt_out):
1✔
1265
            error(f"Couldn't load actor {actor}", status=204)
×
1266

1267
        # apply protocol-specific filters
1268
        if 'force' not in request.values:
1✔
1269
            for filter in from_cls.RECEIVE_FILTERS:
1✔
1270
                if filter(obj, from_user):
1✔
1271
                    error(f'Activity {id} blocked by filter {filter.__name__}')
1✔
1272

1273
        # check if this is a profile object coming in via a user with use_instead
1274
        # set. if so, override the object's id to be the final user id (from_user's),
1275
        # after following use_instead.
1276
        if obj.type in as1.ACTOR_TYPES and from_user.key.id() != actor:
1✔
1277
            as1_id = obj.as1.get('id')
1✔
1278
            if ids.normalize_user_id(id=as1_id, proto=from_user) == actor:
1✔
1279
                logger.info(f'Overriding AS1 object id {as1_id} with Object id {from_user.profile_id()}')
1✔
1280
                obj.our_as1 = {**obj.as1, 'id': from_user.profile_id()}
1✔
1281

1282
        # if this is an object, ie not an activity, wrap it in a create or update
1283
        obj = from_cls.handle_bare_object(obj, authed_as=authed_as,
1✔
1284
                                          from_user=from_user)
1285
        obj.add('users', from_user.key)
1✔
1286

1287
        inner_obj_as1 = as1.get_object(obj.as1)
1✔
1288
        inner_obj_id = inner_obj_as1.get('id')
1✔
1289
        if obj.type in as1.CRUD_VERBS | as1.VERBS_WITH_OBJECT:
1✔
1290
            if not inner_obj_id:
1✔
1291
                error(f'{obj.type} object has no id!')
1✔
1292

1293
        # check age. we support backdated posts, but if they're over 2w old, we
1294
        # don't deliver them
1295
        if obj.type == 'post':
1✔
1296
            if published := inner_obj_as1.get('published'):
1✔
1297
                try:
1✔
1298
                    published_dt = util.parse_iso8601(published)
1✔
1299
                    if not published_dt.tzinfo:
1✔
1300
                        published_dt = published_dt.replace(tzinfo=timezone.utc)
×
1301
                    age = util.now() - published_dt
1✔
1302
                    if (age > CREATE_MAX_AGE
1✔
1303
                            and 'force' not in request.values
1304
                            and not util.domain_or_parent_in(
1305
                                from_user.key.id(), CREATE_MAX_AGE_EXEMPT_DOMAINS)):
1306
                        error(f'Ignoring, too old, {age} is over {CREATE_MAX_AGE}',
×
1307
                              status=204)
1308
                except ValueError:  # from parse_iso8601
×
1309
                    logger.debug(f"Couldn't parse published {published}")
×
1310

1311
        # write Object to datastore
1312
        if obj.type in STORE_AS1_TYPES:
1✔
1313
            obj.put()
1✔
1314

1315
        # store inner object
1316
        # TODO: unify with big obj.type conditional below. would have to merge
1317
        # this with the DM handling block lower down.
1318
        crud_obj = None
1✔
1319
        if obj.type in ('post', 'update') and inner_obj_as1.keys() > set(['id']):
1✔
1320
            # normalize_ids may have converted the inner object id to a user id
1321
            # (eg Web profile URL to domain), so normalize back to the profile
1322
            # object id to find the right existing Object in the datastore
1323
            crud_obj_id = (ids.normalize_object_id(id=inner_obj_id, proto=from_cls)
1✔
1324
                           or inner_obj_id)
1325
            crud_obj = Object.get_or_create(crud_obj_id, our_as1=inner_obj_as1,
1✔
1326
                                            source_protocol=obj.source_protocol,
1327
                                            authed_as=actor, users=[from_user.key],
1328
                                            deleted=False)
1329

1330
        actor = as1.get_object(obj.as1, 'actor')
1✔
1331
        actor_id = actor.get('id')
1✔
1332

1333
        # handle activity!
1334
        if obj.type == 'stop-following':
1✔
1335
            # TODO: unify with handle_follow?
1336
            # TODO: handle multiple followees
1337
            if not actor_id or not inner_obj_id:
1✔
1338
                error(f'stop-following requires actor id and object id. Got: {actor_id} {inner_obj_id} {obj.as1}')
×
1339

1340
            # deactivate Follower
1341
            from_ = from_user_cls.key_for(actor_id)
1✔
1342
            if not (to_cls := Protocol.for_id(inner_obj_id)):
1✔
1343
                error(f"Can't determine protocol for {inner_obj_id} , giving up")
1✔
1344
            to = to_cls.key_for(inner_obj_id)
1✔
1345
            follower = Follower.query(Follower.to == to,
1✔
1346
                                      Follower.from_ == from_,
1347
                                      Follower.status == 'active').get()
1348
            if follower:
1✔
1349
                logger.info(f'Marking {follower} inactive')
1✔
1350
                follower.status = 'inactive'
1✔
1351
                follower.put()
1✔
1352
            else:
1353
                logger.warning(f'No Follower found for {from_} => {to}')
1✔
1354

1355
            # fall through to deliver to followee
1356
            # TODO: do we convert stop-following to webmention 410 of original
1357
            # follow?
1358

1359
            # fall through to deliver to followers
1360

1361
        elif obj.type in ('delete', 'undo'):
1✔
1362
            delete_obj_id = (from_user.profile_id()
1✔
1363
                            if inner_obj_id == from_user.key.id()
1364
                            else inner_obj_id)
1365

1366
            delete_obj = Object.get_by_id(delete_obj_id, authed_as=authed_as)
1✔
1367
            if not delete_obj:
1✔
1368
                logger.info(f"Ignoring, we don't have {delete_obj_id} stored")
1✔
1369
                return 'OK', 204
1✔
1370

1371
            # TODO: just delete altogether!
1372
            logger.info(f'Marking Object {delete_obj_id} deleted')
1✔
1373
            delete_obj.deleted = True
1✔
1374
            delete_obj.put()
1✔
1375

1376
            # if this is an actor, handle deleting it later so that
1377
            # in case it's from_user, user.enabled_protocols is still populated
1378
            #
1379
            # fall through to deliver to followers and delete copy if necessary.
1380
            # should happen via protocol-specific copy target and send of
1381
            # delete activity.
1382
            # https://github.com/snarfed/bridgy-fed/issues/63
1383

1384
        elif obj.type == 'block':
1✔
1385
            if proto := Protocol.for_bridgy_subdomain(inner_obj_id):
1✔
1386
                # blocking protocol bot user disables that protocol
1387
                from_user.delete(proto)
1✔
1388
                from_user.disable_protocol(proto)
1✔
1389
                return 'OK', 200
1✔
1390

1391
        elif obj.type == 'move':
1✔
1392
            from_cls.handle_move(obj, from_user=from_user)
1✔
1393
            # fall through to deliver the Move activity to remaining followers
1394

1395
        elif obj.type == 'post':
1✔
1396
            # handle DMs to bot users
1397
            if as1.is_dm(obj.as1):
1✔
1398
                return dms.receive(from_user=from_user, obj=obj)
1✔
1399

1400
        # fetch actor if necessary
1401
        is_user = from_user.is_profile(orig_obj)
1✔
1402
        if (actor and actor.keys() == set(['id'])
1✔
1403
                and not is_user and obj.type not in ('delete', 'undo')):
1404
            logger.debug('Fetching actor so we have name, profile photo, etc')
1✔
1405
            actor_obj = from_user_cls.load(
1✔
1406
                ids.profile_id(id=actor['id'], proto=from_cls), raise_=False)
1407
            if actor_obj and actor_obj.as1:
1✔
1408
                obj.our_as1 = {
1✔
1409
                    **obj.as1, 'actor': {
1410
                        **actor_obj.as1,
1411
                        # override profile id with actor id
1412
                        # https://github.com/snarfed/bridgy-fed/issues/1720
1413
                        'id': actor['id'],
1414
                    }
1415
                }
1416

1417
        # fetch object if necessary
1418
        if (obj.type in ('post', 'update', 'share')
1✔
1419
                and inner_obj_as1.keys() == set(['id'])
1420
                and from_cls.owns_id(inner_obj_id) is not False):
1421
            logger.debug('Fetching inner object')
1✔
1422
            inner_obj = from_cls.load(inner_obj_id, raise_=False,
1✔
1423
                                      remote=(obj.type in ('post', 'update')))
1424
            if obj.type in ('post', 'update'):
1✔
1425
                crud_obj = inner_obj
1✔
1426
            if inner_obj and inner_obj.as1:
1✔
1427
                obj.our_as1 = {
1✔
1428
                    **obj.as1,
1429
                    'object': {
1430
                        **inner_obj_as1,
1431
                        **inner_obj.as1,
1432
                    }
1433
                }
1434
            elif obj.type in ('post', 'update'):
1✔
1435
                error(f"Need object {inner_obj_id} but couldn't fetch, giving up")
1✔
1436

1437
        if obj.type == 'follow':
1✔
1438
            if proto := Protocol.for_bridgy_subdomain(inner_obj_id):
1✔
1439
                # follow of one of our protocol bot users; enable that protocol.
1440
                # fall through so that we send an accept.
1441
                try:
1✔
1442
                    from_user.enable_protocol(proto)
1✔
1443
                except ErrorButDoNotRetryTask:
1✔
1444
                    from web import Web
1✔
1445
                    bot = Web.get_by_id(proto.bot_user_id())
1✔
1446
                    from_cls.respond_to_follow('reject', follower=from_user,
1✔
1447
                                               followee=bot, follow=obj)
1448
                    raise
1✔
1449
                proto.bot_maybe_follow_back(from_user)
1✔
1450
                from_cls.handle_follow(obj, from_user=from_user)
1✔
1451
                return 'OK', 202
1✔
1452

1453
            from_cls.handle_follow(obj, from_user=from_user)
1✔
1454

1455
        # on update of the user's own actor/profile, set user.obj and store user back
1456
        # to datastore so that we recalculate computed properties like status etc
1457
        if is_user:
1✔
1458
            if obj.type == 'update' and crud_obj:
1✔
1459
                logger.info(f"update of the user's profile, re-storing user with obj_key {crud_obj.key.id()}")
1✔
1460
                from_user.obj = crud_obj
1✔
1461
                from_user.put()
1✔
1462

1463
        # deliver to targets
1464
        resp = from_cls.deliver(obj, from_user=from_user, crud_obj=crud_obj)
1✔
1465

1466
        # on user deleting themselves, deactivate their followers/followings.
1467
        # https://github.com/snarfed/bridgy-fed/issues/1304
1468
        #
1469
        # do this *after* delivering because delivery finds targets based on
1470
        # stored Followers
1471
        if is_user and obj.type == 'delete':
1✔
1472
            for proto in from_user.enabled_protocols:
1✔
1473
                from_user.disable_protocol(PROTOCOLS[proto])
1✔
1474

1475
            logger.info(f'Deactivating Followers from or to {from_user.key.id()}')
1✔
1476
            followers = Follower.query(
1✔
1477
                OR(Follower.to == from_user.key, Follower.from_ == from_user.key)
1478
            ).fetch()
1479
            for f in followers:
1✔
1480
                f.status = 'inactive'
1✔
1481
            ndb.put_multi(followers)
1✔
1482

1483
        memcache.memcache.set(memcache_key, 'done', expire=7 * 24 * 60 * 60)  # 1w
1✔
1484
        return resp
1✔
1485

1486
    @classmethod
1✔
1487
    def handle_follow(from_cls, obj, from_user):
1✔
1488
        """Handles an incoming follow activity.
1489

1490
        Sends an ``Accept`` back, but doesn't send the ``Follow`` itself. That
1491
        happens in :meth:`deliver`.
1492

1493
        Args:
1494
          obj (models.Object): follow activity
1495
        """
1496
        logger.debug('Got follow. storing Follow(s), sending accept(s)')
1✔
1497
        from_id = from_user.key.id()
1✔
1498

1499
        # Prepare followee (to) users' data
1500
        to_as1s = as1.get_objects(obj.as1)
1✔
1501
        if not to_as1s:
1✔
1502
            error(f'Follow activity requires object(s). Got: {obj.as1}')
×
1503

1504
        # Store Followers
1505
        for to_as1 in to_as1s:
1✔
1506
            to_id = to_as1.get('id')
1✔
1507
            if not to_id:
1✔
1508
                error(f'Follow activity requires object(s). Got: {obj.as1}')
×
1509

1510
            logger.info(f'Follow {from_id} => {to_id}')
1✔
1511

1512
            to_cls = Protocol.for_id(to_id)
1✔
1513
            if not to_cls:
1✔
1514
                error(f"Couldn't determine protocol for {to_id}")
×
1515
            elif from_cls == to_cls:
1✔
1516
                logger.info(f'Skipping same-protocol Follower {from_id} => {to_id}')
1✔
1517
                continue
1✔
1518

1519
            to_key = to_cls.key_for(to_id)
1✔
1520
            if not to_key:
1✔
1521
                logger.info(f'Skipping invalid {to_cls.LABEL} user key: {to_id}')
×
1522
                continue
×
1523

1524
            to_user = to_cls.get_or_create(id=to_key.id())
1✔
1525
            if not to_user or not to_user.is_enabled(from_cls):
1✔
1526
                error(f'{to_id} not found')
1✔
1527

1528
            follower_obj = Follower.get_or_create(to=to_user, from_=from_user,
1✔
1529
                                                  follow=obj.key, status='active')
1530
            if (from_cls.USES_OBJECT_FEED
1✔
1531
                    and from_cls.LABEL not in to_user.has_object_feed_followers_on):
1532
                to_user.has_object_feed_followers_on.append(from_cls.LABEL)
1✔
1533
                to_user.put()
1✔
1534

1535
            obj.add('notify', to_key)
1✔
1536
            from_cls.respond_to_follow('accept', follower=from_user,
1✔
1537
                                       followee=to_user, follow=obj)
1538

1539
    @classmethod
1✔
1540
    def respond_to_follow(_, verb, follower, followee, follow):
1✔
1541
        """Sends an accept or reject activity for a follow.
1542

1543
        ...if the follower's protocol supports accepts/rejects. Otherwise, does
1544
        nothing.
1545

1546
        Args:
1547
          verb (str): ``accept`` or  ``reject``
1548
          follower (models.User)
1549
          followee (models.User)
1550
          follow (models.Object)
1551
        """
1552
        assert verb in ('accept', 'reject')
1✔
1553
        if verb not in follower.SUPPORTED_AS1_TYPES:
1✔
1554
            return
1✔
1555

1556
        if not follower.obj or not (target := follower.target_for(follower.obj)):
1✔
1557
            error(f"Couldn't find delivery target for follower {follower.key.id()}")
1✔
1558

1559
        # send. note that this is one response for the whole follow, even if it
1560
        # has multiple followees!
1561
        id = f'{followee.key.id()}/followers#{verb}-{follow.key.id()}'
1✔
1562
        accept = {
1✔
1563
            'id': id,
1564
            'objectType': 'activity',
1565
            'verb': verb,
1566
            'actor': followee.key.id(),
1567
            'object': follow.as1,
1568
        }
1569
        common.create_task(queue='send', id=id, our_as1=accept, url=target,
1✔
1570
                           protocol=follower.LABEL, user=followee.key.urlsafe())
1571

1572
    @classmethod
1✔
1573
    def bot_maybe_follow_back(bot_cls, user):
1✔
1574
        """Follow a user from a protocol bot user, if their protocol needs that.
1575

1576
        ...so that the protocol starts sending us their activities, if it needs
1577
        a follow for that (eg ActivityPub).
1578

1579
        Args:
1580
          user (User)
1581
        """
1582
        if not user.BOTS_FOLLOW_BACK:
1✔
1583
            return
1✔
1584

1585
        from web import Web
1✔
1586
        bot = Web.get_by_id(bot_cls.bot_user_id())
1✔
1587
        now = util.now().isoformat()
1✔
1588
        logger.info(f'Following {user.key.id()} back from bot user {bot.key.id()}')
1✔
1589

1590
        if not user.obj:
1✔
1591
            logger.info("  can't follow, user has no profile obj")
1✔
1592
            return
1✔
1593

1594
        target = user.target_for(user.obj)
1✔
1595
        follow_back_id = f'https://{bot.key.id()}/#follow-back-{user.key.id()}-{now}'
1✔
1596
        follow_back_as1 = {
1✔
1597
            'objectType': 'activity',
1598
            'verb': 'follow',
1599
            'id': follow_back_id,
1600
            'actor': bot.key.id(),
1601
            'object': user.key.id(),
1602
        }
1603
        common.create_task(queue='send', id=follow_back_id,
1✔
1604
                           our_as1=follow_back_as1, url=target,
1605
                           source_protocol='web', protocol=user.LABEL,
1606
                           user=bot.key.urlsafe())
1607

1608
    @classmethod
1✔
1609
    def handle_move(from_cls, obj, from_user):
1✔
1610
        """Handles an incoming move (account migration) activity.
1611

1612
        Updates all of the account's :class:`Follower`s to point to the new id.
1613

1614
        Args:
1615
          obj (models.Object): follow activity
1616
          from_user (models.User): user (actor) this activity/object is from
1617
        """
1618
        if not (target_id := as1.get_id(obj.as1, 'target')):
1✔
1619
            error(f'Move activity requires target. Got: {obj.as1}')
1✔
1620

1621
        logger.info(f'Got move activity from {from_user.key.id()} to {target_id}')
1✔
1622

1623
        # check that object is the actor (the account being moved)
1624
        actor_id = as1.get_id(obj.as1, 'actor')
1✔
1625
        object_id = as1.get_id(obj.as1, 'object')
1✔
1626
        if actor_id != object_id:
1✔
1627
            error(f"Move activity object {object_id} isn't actor {actor_id}")
1✔
1628

1629
        # get the target protocol and key
1630
        to_cls = Protocol.for_id(target_id)
1✔
1631
        if not to_cls:
1✔
1632
            error(f"Couldn't determine protocol for target {target_id}")
×
1633

1634
        to_user = to_cls.get_or_create(target_id, manual_opt_out=False,
1✔
1635
                                       enabled_protocols=from_user.enabled_protocols)
1636
        if not to_user:
1✔
NEW
1637
            error(f"Couldn't create {to_cls.LABEL} user {target_id}", status=299)
×
1638

1639
        if from_user.enabled_protocols:
1✔
1640
            # from user has bridged copy accounts; transfer them to the new user
1641
            for label in from_user.enabled_protocols:
1✔
1642
                proto = PROTOCOLS[label]
1✔
1643
                if copy_id := from_user.get_copy(proto):
1✔
1644
                    from_user.remove_copies_on(proto)
1✔
1645
                    to_user.add('copies', Target(uri=copy_id, protocol=label))
1✔
1646

1647
            from_user.put()
1✔
1648
            to_user.put()
1✔
1649

1650
        # query for all active followers of the source account
1651
        followers = Follower.query(
1✔
1652
            Follower.to == from_user.key,
1653
            Follower.status == 'active'
1654
        ).fetch()
1655

1656
        # update each follower to point to the new account
1657
        # but skip if it would create a same-protocol follower
1658
        logger.info(f'Updating {len(followers)} followers from {actor_id} to {target_id}')
1✔
1659
        updated_followers = []
1✔
1660
        for follower in followers:
1✔
1661
            # check if this would create a same-protocol follower
1662
            if follower.from_.kind() != to_user.key.kind():
1✔
1663
                follower.to = to_user.key
1✔
1664
                updated_followers.append(follower)
1✔
1665
            else:
1666
                logger.info(f'Skipping same-protocol follower {follower.from_.id()} => {to_user.key.id()}')
1✔
1667

1668
        if updated_followers:
1✔
1669
            ndb.put_multi(updated_followers)
1✔
1670

1671
    @classmethod
1✔
1672
    def handle_bare_object(cls, obj, *, authed_as, from_user):
1✔
1673
        """If obj is a bare object, wraps it in a create or update activity.
1674

1675
        Checks if we've seen it before.
1676

1677
        Args:
1678
          obj (models.Object)
1679
          authed_as (str): authenticated actor id who sent this activity
1680
          from_user (models.User): user (actor) this activity/object is from
1681

1682
        Returns:
1683
          models.Object: ``obj`` if it's an activity, otherwise a new object
1684
        """
1685
        is_actor = obj.type in as1.ACTOR_TYPES
1✔
1686
        if not is_actor and obj.type not in ('note', 'article', 'comment'):
1✔
1687
            return obj
1✔
1688

1689
        obj_actor = ids.normalize_user_id(id=as1.get_owner(obj.as1), proto=cls)
1✔
1690
        now = util.now().isoformat()
1✔
1691

1692
        # this is a raw post; wrap it in a create or update activity
1693
        if obj.changed or is_actor:
1✔
1694
            if obj.changed:
1✔
1695
                logger.info(f'Content has changed from last time at {obj.updated}! Redelivering to all inboxes')
1✔
1696
            else:
1697
                logger.info(f'Got actor profile object, wrapping in update')
1✔
1698
            id = f'{obj.key.id()}#bridgy-fed-update-{now}'
1✔
1699
            update_as1 = {
1✔
1700
                'objectType': 'activity',
1701
                'verb': 'update',
1702
                'id': id,
1703
                'actor': obj_actor,
1704
                'object': {
1705
                    # Mastodon requires the updated field for Updates, so
1706
                    # add a default value.
1707
                    # https://docs.joinmastodon.org/spec/activitypub/#supported-activities-for-statuses
1708
                    # https://socialhub.activitypub.rocks/t/what-could-be-the-reason-that-my-update-activity-does-not-work/2893/4
1709
                    # https://github.com/mastodon/documentation/pull/1150
1710
                    'updated': now,
1711
                    **obj.as1,
1712
                },
1713
            }
1714
            logger.debug(f'  AS1: {json_dumps(update_as1, indent=2)}')
1✔
1715
            return Object(id=id, our_as1=update_as1,
1✔
1716
                          source_protocol=obj.source_protocol)
1717

1718
        if obj.new or 'force' in request.values:
1✔
1719
            create_id = f'{obj.key.id()}#bridgy-fed-create-{now}'
1✔
1720
            create_as1 = {
1✔
1721
                'objectType': 'activity',
1722
                'verb': 'post',
1723
                'id': create_id,
1724
                'actor': obj_actor,
1725
                'object': obj.as1,
1726
                'published': now,
1727
            }
1728
            logger.info(f'Wrapping in post')
1✔
1729
            logger.debug(f'  AS1: {json_dumps(create_as1, indent=2)}')
1✔
1730
            return Object(id=create_id, our_as1=create_as1,
1✔
1731
                          source_protocol=obj.source_protocol)
1732

1733
        error(f'{obj.key.id()} is unchanged, nothing to do', status=204)
×
1734

1735
    @classmethod
1✔
1736
    def deliver(from_cls, obj, from_user, crud_obj=None, to_proto=None):
1✔
1737
        """Delivers an activity to its external recipients.
1738

1739
        Args:
1740
          obj (models.Object): activity to deliver
1741
          from_user (models.User): user (actor) this activity is from
1742
          crud_obj (models.Object): if this is a create, update, or delete/undo
1743
            activity, the inner object that's being written, otherwise None.
1744
            (This object's ``notify`` and ``feed`` properties may be updated.)
1745
          to_proto (protocol.Protocol): optional; if provided, only deliver to
1746
            targets on this protocol
1747

1748
        Returns:
1749
          (str, int) tuple: Flask response
1750
        """
1751
        if to_proto:
1✔
1752
            logger.info(f'Only delivering to {to_proto.LABEL}')
1✔
1753

1754
        # find delivery targets. maps Target to Object or None
1755
        #
1756
        # ...then write the relevant object, since targets() has a side effect of
1757
        # setting the notify and feed properties (and dirty attribute)
1758
        targets = from_cls.targets(obj, from_user=from_user, crud_obj=crud_obj)
1✔
1759
        if to_proto:
1✔
1760
            targets = {t: obj for t, obj in targets.items()
1✔
1761
                       if t.protocol == to_proto.LABEL}
1762
        if not targets:
1✔
1763
            # don't raise via error() because we call deliver in code paths where
1764
            # we want to continue after
1765
            msg = r'No targets, nothing to do ¯\_(ツ)_/¯'
1✔
1766
            logger.info(msg)
1✔
1767
            return msg, 204
1✔
1768

1769
        # store object that targets() updated
1770
        if crud_obj and crud_obj.dirty:
1✔
1771
            crud_obj.put()
1✔
1772
        elif obj.type in STORE_AS1_TYPES and obj.dirty:
1✔
1773
            obj.put()
1✔
1774

1775
        obj_params = ({'obj_id': obj.key.id()} if obj.type in STORE_AS1_TYPES
1✔
1776
                      else obj.to_request())
1777

1778
        # sort targets so order is deterministic for tests, debugging, etc
1779
        sorted_targets = sorted(targets.items(), key=lambda t: t[0].uri)
1✔
1780

1781
        # enqueue send task for each targets
1782
        logger.info(f'Delivering to {" ".join(t.uri for t, _ in sorted_targets)}')
1✔
1783
        user = from_user.key.urlsafe()
1✔
1784
        for i, (target, orig_obj) in enumerate(sorted_targets):
1✔
1785
            orig_obj_id = orig_obj.key.id() if orig_obj else None
1✔
1786
            common.create_task(queue='send', url=target.uri, protocol=target.protocol,
1✔
1787
                               orig_obj_id=orig_obj_id, user=user, **obj_params)
1788

1789
        return 'OK', 202
1✔
1790

1791
    @classmethod
1✔
1792
    def targets(from_cls, obj, from_user, crud_obj=None, internal=False):
1✔
1793
        """Collects the targets to send a :class:`models.Object` to.
1794

1795
        Targets are both objects - original posts, events, etc - and actors.
1796

1797
        Args:
1798
          obj (models.Object)
1799
          from_user (User)
1800
          crud_obj (models.Object): if this is a create, update, or delete/undo
1801
            activity, the inner object that's being written, otherwise None.
1802
            (This object's ``notify`` and ``feed`` properties may be updated.)
1803
          internal (bool): whether this is a recursive internal call
1804

1805
        Returns:
1806
          dict: maps :class:`models.Target` to original (in response to)
1807
          :class:`models.Object`
1808
        """
1809
        logger.debug('Finding recipients and their targets')
1✔
1810

1811
        # we should only have crud_obj iff this is a create or update
1812
        assert (crud_obj is not None) == (obj.type in ('post', 'update')), obj.type
1✔
1813
        write_obj = crud_obj or obj
1✔
1814
        write_obj.dirty = False
1✔
1815

1816
        target_uris = as1.targets(obj.as1)
1✔
1817
        orig_obj = None
1✔
1818
        targets = {}  # maps Target (with *normalized* uri) to Object or None
1✔
1819
        owner = as1.get_owner(obj.as1)
1✔
1820
        allow_opt_out = (obj.type == 'delete')
1✔
1821
        inner_obj_as1 = as1.get_object(obj.as1)
1✔
1822
        inner_obj_id = inner_obj_as1.get('id')
1✔
1823
        in_reply_tos = as1.get_ids(inner_obj_as1, 'inReplyTo')
1✔
1824
        quoted_posts = as1.quoted_posts(inner_obj_as1)
1✔
1825
        mentioned_urls = as1.mentions(inner_obj_as1)
1✔
1826
        is_reply = obj.type == 'comment' or in_reply_tos
1✔
1827
        is_self_reply = False
1✔
1828

1829
        original_ids = []
1✔
1830
        if is_reply:
1✔
1831
            original_ids = in_reply_tos
1✔
1832
        elif inner_obj_id:
1✔
1833
            if inner_obj_id == from_user.key.id():
1✔
1834
                inner_obj_id = from_user.profile_id()
1✔
1835
            original_ids = [inner_obj_id]
1✔
1836

1837
        # maps id to Object
1838
        original_objs = {}
1✔
1839
        for id in original_ids:
1✔
1840
            if proto := Protocol.for_id(id):
1✔
1841
                original_objs[id] = proto.load(id, raise_=False)
1✔
1842

1843
        # for AP, add in-reply-tos' mentions
1844
        # https://github.com/snarfed/bridgy-fed/issues/1608
1845
        # https://github.com/snarfed/bridgy-fed/issues/1218
1846
        orig_post_mentions = {}  # maps mentioned id to original post Object
1✔
1847
        for id in in_reply_tos:
1✔
1848
            if ((in_reply_to_obj := original_objs.get(id))
1✔
1849
                    and (proto := PROTOCOLS.get(in_reply_to_obj.source_protocol))
1850
                    and proto.SEND_REPLIES_TO_ORIG_POSTS_MENTIONS
1851
                    and (mentions := as1.mentions(in_reply_to_obj.as1))):
1852
                logger.info(f"Adding in-reply-to {id} 's mentions to targets: {mentions}")
1✔
1853
                target_uris.extend(mentions)
1✔
1854
                for mention in mentions:
1✔
1855
                    orig_post_mentions[mention] = in_reply_to_obj
1✔
1856

1857
        target_uris = sorted(set(target_uris))
1✔
1858
        logger.info(f'Raw targets: {target_uris}')
1✔
1859

1860
        # which protocols should we allow delivering to?
1861
        to_protocols = []  # elements are Protocol subclasses
1✔
1862
        for label in (list(from_user.DEFAULT_ENABLED_PROTOCOLS)
1✔
1863
                      + from_user.enabled_protocols):
1864
            if not (proto := PROTOCOLS.get(label)):
1✔
1865
                report_error(f'unknown enabled protocol {label} for {from_user.key.id()}')
1✔
1866
                continue
1✔
1867

1868
            if (obj.type == 'post' and (orig := original_objs.get(inner_obj_id))
1✔
1869
                    and orig.get_copy(proto)):
1870
                logger.info(f'Already created {id} on {label}, cowardly refusing to create there again')
1✔
1871
                continue
1✔
1872

1873
            if proto.HAS_COPIES and (obj.type in ('update', 'delete', 'share', 'undo')
1✔
1874
                                     or is_reply):
1875
                origs_could_bridge = None
1✔
1876

1877
                for id in original_ids:
1✔
1878
                    if not (orig := original_objs.get(id)):
1✔
1879
                        continue
1✔
1880
                    elif orig.get_copy(proto):
1✔
1881
                        logger.info(f'Allowing {label}, original {id} was bridged there')
1✔
1882
                        break
1✔
1883
                    elif from_user.is_profile(orig):
1✔
1884
                        logger.info(f"Allowing {label}, this is the user's profile")
1✔
1885
                        break
1✔
1886

1887
                    if (origs_could_bridge is not False
1✔
1888
                            and (orig_author_id := as1.get_owner(orig.as1))
1889
                            and (orig_proto := orig.owner_protocol())
1890
                            and (orig_author := orig_proto.get_by_id(orig_author_id))):
1891
                        origs_could_bridge = orig_author.is_enabled(proto)
1✔
1892

1893
                else:
1894
                    msg = f"original object(s) {original_ids} weren't bridged to {label}"
1✔
1895
                    last_retry = False
1✔
1896
                    if retries := request.headers.get(TASK_RETRIES_HEADER):
1✔
1897
                        if (last_retry := int(retries) >= TASK_RETRIES_RECEIVE):
1✔
1898
                            logger.info(f'last retry! skipping {proto.LABEL} and continuing')
1✔
1899

1900
                    if (proto.LABEL not in from_user.DEFAULT_ENABLED_PROTOCOLS
1✔
1901
                            and origs_could_bridge and not last_retry):
1902
                        # retry later; original obj may still be bridging
1903
                        # TODO: limit to brief window, eg no older than 2h? 1d?
1904
                        error(msg, status=304)
1✔
1905

1906
                    logger.info(msg)
1✔
1907
                    continue
1✔
1908

1909
            util.add(to_protocols, proto)
1✔
1910

1911
        logger.info(f'allowed protocols {[p.LABEL for p in to_protocols]}')
1✔
1912

1913
        # process direct targets
1914
        for target_id in target_uris:
1✔
1915
            target_proto = Protocol.for_id(target_id)
1✔
1916
            if not target_proto:
1✔
1917
                logger.info(f"Can't determine protocol for {target_id}")
1✔
1918
                continue
1✔
1919
            elif target_proto.is_blocklisted(target_id):
1✔
1920
                logger.debug(f'{target_id} is blocklisted')
1✔
1921
                continue
1✔
1922

1923
            target_is_actor = (target_id in mentioned_urls
1✔
1924
                               or obj.type in as1.VERBS_WITH_ACTOR_OBJECT)
1925

1926
            target_obj_id = (ids.profile_id(id=target_id, proto=target_proto)
1✔
1927
                             if target_is_actor
1928
                             # not ideal. this can sometimes be a non-user, eg
1929
                             # blocking a blocklist. ok right now since profile_id()
1930
                             # returns its input id unchanged if it doesn't look like
1931
                             # a user id, but that's brittle.
1932
                             else target_id)
1933
            orig_obj = target_proto.load(target_obj_id, raise_=False)
1✔
1934
            if not orig_obj or not orig_obj.as1:
1✔
1935
                logger.info(f"Couldn't load {target_obj_id}")
1✔
1936
                continue
1✔
1937

1938
            target_author_key = (target_proto(id=target_id).key if target_is_actor
1✔
1939
                                 else target_proto.actor_key(orig_obj))
1940

1941
            if not from_user.is_enabled(target_proto):
1✔
1942
                # if author isn't bridged and target user is, DM a prompt and
1943
                # add a notif for the target user
1944
                if (target_id in (in_reply_tos + quoted_posts + mentioned_urls)
1✔
1945
                        and target_author_key):
1946
                    if target_author := target_author_key.get():
1✔
1947
                        if target_author.is_enabled(from_cls):
1✔
1948
                            notifications.add_notification(target_author, write_obj)
1✔
1949
                            verb, noun = (
1✔
1950
                                ('replied to', 'replies') if target_id in in_reply_tos
1951
                                else ('quoted', 'quotes') if target_id in quoted_posts
1952
                                else ('mentioned', 'mentions'))
1953
                            dms.maybe_send(from_=target_proto, to_user=from_user,
1✔
1954
                                           type='replied_to_bridged_user', text=f"""\
1955
Hi! You <a href="{inner_obj_as1.get('url') or inner_obj_id}">recently {verb}</a> {target_author.html_link()}, who's bridged here from {target_proto.PHRASE}. If you want them to see your {noun}, you can bridge your account into {target_proto.PHRASE} by following this account. <a href="https://fed.brid.gy/docs">See the docs</a> for more information.""")
1956

1957
                continue
1✔
1958

1959
            # deliver self-replies to followers
1960
            # https://github.com/snarfed/bridgy-fed/issues/639
1961
            if target_id in in_reply_tos and owner == as1.get_owner(orig_obj.as1):
1✔
1962
                is_self_reply = True
1✔
1963
                logger.info(f'self reply!')
1✔
1964

1965
            # also add copies' targets
1966
            for copy in orig_obj.copies:
1✔
1967
                proto = PROTOCOLS[copy.protocol]
1✔
1968
                if proto in to_protocols:
1✔
1969
                    # copies generally won't have their own Objects
1970
                    if target := proto.target_for(Object(id=copy.uri)):
1✔
1971
                        target = util.normalize_url(target, trailing_slash=False)
1✔
1972
                        logger.debug(f'Adding target {target} for copy {copy.uri} of original {target_id}')
1✔
1973
                        targets[Target(protocol=copy.protocol, uri=target)] = orig_obj
1✔
1974

1975
            if target_proto == from_cls:
1✔
1976
                logger.debug(f'Skipping same-protocol target {target_id}')
1✔
1977
                continue
1✔
1978

1979
            target = target_proto.target_for(orig_obj)
1✔
1980
            if not target:
1✔
1981
                # TODO: surface errors like this somehow?
1982
                logger.error(f"Can't find delivery target for {target_id}")
×
1983
                continue
×
1984

1985
            target = util.normalize_url(target, trailing_slash=False)
1✔
1986
            logger.debug(f'Target for {target_id} is {target} {target_author_key}')
1✔
1987

1988
            # only use orig_obj for inReplyTos, like/repost objects, reply's original
1989
            # post's mentions, etc
1990
            # https://github.com/snarfed/bridgy-fed/issues/1237
1991
            target_obj = None
1✔
1992
            if target_id in in_reply_tos + as1.get_ids(obj.as1, 'object'):
1✔
1993
                target_obj = orig_obj
1✔
1994
            elif target_id in orig_post_mentions:
1✔
1995
                target_obj = orig_post_mentions[target_id]
1✔
1996
            targets[Target(protocol=target_proto.LABEL, uri=target)] = target_obj
1✔
1997

1998
            if target_author_key:
1✔
1999
                logger.debug(f'Recipient is {target_author_key}')
1✔
2000
                if obj.type not in DONT_NOTIFY_TYPES:
1✔
2001
                    if write_obj.add('notify', target_author_key):
1✔
2002
                        write_obj.dirty = True
1✔
2003

2004
        if obj.type == 'undo':
1✔
2005
            logger.info('Object is an undo; adding targets for inner object')
1✔
2006
            if set(inner_obj_as1.keys()) == {'id'}:
1✔
2007
                inner_obj = from_cls.load(inner_obj_id, raise_=False)
1✔
2008
            else:
2009
                inner_obj = Object(id=inner_obj_id, our_as1=inner_obj_as1)
1✔
2010
            if inner_obj:
1✔
2011
                for target, target_obj in from_cls.targets(
1✔
2012
                        inner_obj, from_user=from_user, internal=True).items():
2013
                    targets[target] = target_obj
1✔
2014
                    util.add(to_protocols, PROTOCOLS[target.protocol])
1✔
2015

2016
        if not to_protocols:
1✔
2017
            return {}
1✔
2018

2019
        logger.info(f'Direct targets: {[t.uri for t in targets.keys()]}')
1✔
2020

2021
        # deliver to followers, if appropriate
2022
        user_key = from_cls.actor_key(obj, allow_opt_out=allow_opt_out)
1✔
2023
        if not user_key:
1✔
2024
            logger.info("Can't tell who this is from! Skipping followers.")
1✔
2025
            return targets
1✔
2026

2027
        # we deliver to HAS_COPIES protocols separately, below. we assume they have
2028
        # follower-independent targets.
2029
        to_followers_protos = [
1✔
2030
            p for p in to_protocols
2031
            if not (p.HAS_COPIES and p.DEFAULT_TARGET)
2032
            and not (p.USES_OBJECT_FEED and p.LABEL not in from_user.has_object_feed_followers_on)]
2033
        followers = []
1✔
2034
        is_undo_block = obj.type == 'undo' and inner_obj_as1.get('verb') == 'block'
1✔
2035
        if (obj.type in ('post', 'update', 'delete', 'move', 'share', 'undo')
1✔
2036
                and (not is_reply or is_self_reply) and not is_undo_block
2037
                and to_followers_protos):
2038
            logger.info(f'Delivering to followers of {user_key.id()} on {[p.LABEL for p in to_followers_protos]}')
1✔
2039
            # query each protocol individually
2040
            for proto in to_followers_protos:
1✔
2041
                kind = proto._get_kind()
1✔
2042
                for f in Follower.query(
1✔
2043
                        Follower.to == user_key,
2044
                        Follower.status == 'active',
2045
                        Follower.from_ >= ndb.Key(kind, '\x00'),
2046
                        Follower.from_ < ndb.Key(kind + '\x00', '\x00')):
2047
                    # skip protocol bot users
2048
                    if not Protocol.for_bridgy_subdomain(f.from_.id()):
1✔
2049
                        followers.append(f)
1✔
2050

2051
            logger.debug(f'  loaded {len(followers)} followers')
1✔
2052

2053
            user_keys = [f.from_ for f in followers]
1✔
2054
            users = [u for u in ndb.get_multi(user_keys) if u]
1✔
2055
            logger.debug(f'  loaded {len(users)} users')
1✔
2056

2057
            User.load_multi(users)
1✔
2058
            logger.debug(f'  loaded user objects')
1✔
2059

2060
            if (not followers and
1✔
2061
                (util.domain_or_parent_in(from_user.key.id(), LIMITED_DOMAINS)
2062
                 or util.domain_or_parent_in(obj.key.id(), LIMITED_DOMAINS))):
2063
                logger.info(f'skipping, {from_user.key.id()} is on a limited domain and has no followers')
1✔
2064
                return {}
1✔
2065

2066
            # add to followers' feeds, if any
2067
            if not internal and obj.type in ('post', 'update', 'share'):
1✔
2068
                if write_obj.type not in as1.ACTOR_TYPES:
1✔
2069
                    write_obj.feed = [u.key for u in users if u.USES_OBJECT_FEED]
1✔
2070
                    if write_obj.feed:
1✔
2071
                        write_obj.dirty = True
1✔
2072

2073
            # collect targets for followers
2074
            target_obj = (original_objs.get(inner_obj_id)
1✔
2075
                          if obj.type == 'share' else None)
2076
            for user in users:
1✔
2077
                if user.is_blocking(from_user):
1✔
2078
                    logger.debug(f'  {user.key.id()} blocks {from_user.key.id()}')
1✔
2079
                    continue
1✔
2080

2081
                # TODO: should we pass remote=False through here to Protocol.load?
2082
                target = user.target_for(user.obj, shared=True) if user.obj else None
1✔
2083
                if not target:
1✔
2084
                    continue
1✔
2085

2086
                target = util.normalize_url(target, trailing_slash=False)
1✔
2087
                targets[Target(protocol=user.LABEL, uri=target)] = target_obj
1✔
2088

2089
            logger.debug(f'  collected {len(targets)} targets')
1✔
2090

2091
        # deliver to enabled HAS_COPIES protocols proactively
2092
        if obj.type in ('post', 'update', 'delete', 'share'):
1✔
2093
            for proto in to_protocols:
1✔
2094
                if proto.HAS_COPIES and proto.DEFAULT_TARGET:
1✔
2095
                    logger.info(f'user has {proto.LABEL} enabled, adding {proto.DEFAULT_TARGET}')
1✔
2096
                    targets.setdefault(
1✔
2097
                        Target(protocol=proto.LABEL, uri=proto.DEFAULT_TARGET), None)
2098

2099
        # maps string target URL to (Target, Object) tuple
2100
        candidates = {t.uri: (t, obj) for t, obj in targets.items()}
1✔
2101
        # maps Target to Object or None
2102
        targets = {}
1✔
2103
        source_domains = [
1✔
2104
            util.domain_from_link(url) for url in
2105
            (obj.as1.get('id'), obj.as1.get('url'), as1.get_owner(obj.as1))
2106
            if util.is_web(url)
2107
        ]
2108
        for url in sorted(util.dedupe_urls(
1✔
2109
                candidates.keys(),
2110
                # preserve our PDS URL without trailing slash in path
2111
                # https://atproto.com/specs/did#did-documents
2112
                trailing_slash=False)):
2113
            if util.is_web(url) and util.domain_from_link(url) in source_domains:
1✔
2114
                logger.info(f'Skipping same-domain target {url}')
×
2115
                continue
×
2116
            elif from_user.is_blocking(url):
1✔
2117
                logger.debug(f'{from_user.key.id()} blocks {url}')
1✔
2118
                continue
1✔
2119

2120
            target, obj = candidates[url]
1✔
2121
            targets[target] = obj
1✔
2122

2123
        return targets
1✔
2124

2125
    @classmethod
1✔
2126
    def load(cls, id, remote=None, local=True, raise_=True, raw=False, csv=False,
1✔
2127
             **kwargs):
2128
        """Loads and returns an Object from datastore or HTTP fetch.
2129

2130
        Sets the :attr:`new` and :attr:`changed` attributes if we know either
2131
        one for the loaded object, ie local is True and remote is True or None.
2132

2133
        Args:
2134
          id (str)
2135
          remote (bool): whether to fetch the object over the network. If True,
2136
            fetches even if we already have the object stored, and updates our
2137
            stored copy. If False and we don't have the object stored, returns
2138
            None. Default (None) means to fetch over the network only if we
2139
            don't already have it stored.
2140
          local (bool): whether to load from the datastore before
2141
            fetching over the network. If False, still stores back to the
2142
            datastore after a successful remote fetch.
2143
          raise_ (bool): if False, catches any :class:`request.RequestException`
2144
            or :class:`HTTPException` raised by :meth:`fetch()` and returns
2145
            ``None`` instead
2146
          raw (bool): whether to load this as a "raw" id, as is, without
2147
            normalizing to an on-protocol object id. Exact meaning varies by subclass.
2148
          csv (bool): whether to specifically load a CSV object
2149
            TODO: merge this into raw, using returned Content-Type?
2150
          kwargs: passed through to :meth:`fetch()`
2151

2152
        Returns:
2153
          models.Object: loaded object, or None if it isn't fetchable, eg a
2154
          non-URL string for Web, or ``remote`` is False and it isn't in the
2155
          datastore
2156

2157
        Raises:
2158
          requests.HTTPError: anything that :meth:`fetch` raises, if ``raise_``
2159
            is True
2160
        """
2161
        assert id
1✔
2162
        assert local or remote is not False
1✔
2163
        # logger.debug(f'Loading Object {id} local={local} remote={remote}')
2164

2165
        if not raw:
1✔
2166
            id = ids.normalize_object_id(id=id, proto=cls)
1✔
2167

2168
        obj = orig_as1 = None
1✔
2169
        if local:
1✔
2170
            if obj := Object.get_by_id(id):
1✔
2171
                if csv and not obj.is_csv:
1✔
2172
                    return None
1✔
2173
                elif obj.as1 or obj.csv or obj.raw or obj.deleted:
1✔
2174
                    # logger.debug(f'  {id} got from datastore')
2175
                    obj.new = False
1✔
2176

2177
        if remote is False:
1✔
2178
            return obj
1✔
2179
        elif remote is None and obj:
1✔
2180
            if obj.updated < util.as_utc(util.now() - OBJECT_REFRESH_AGE):
1✔
2181
                # logger.debug(f'  last updated {obj.updated}, refreshing')
2182
                pass
1✔
2183
            else:
2184
                return obj
1✔
2185

2186
        if obj:
1✔
2187
            orig_as1 = obj.as1
1✔
2188
            obj.our_as1 = None
1✔
2189
            obj.new = False
1✔
2190
        else:
2191
            if cls == Protocol:
1✔
2192
                return None
1✔
2193
            obj = Object(id=id)
1✔
2194
            if local:
1✔
2195
                # logger.debug(f'  {id} not in datastore')
2196
                obj.new = True
1✔
2197
                obj.changed = False
1✔
2198

2199
        try:
1✔
2200
            fetched = cls.fetch(obj, csv=csv, **kwargs)
1✔
2201
        except (RequestException, HTTPException, InvalidStatus) as e:
1✔
2202
            if raise_:
1✔
2203
                raise
1✔
2204
            util.interpret_http_exception(e)
1✔
2205
            return None
1✔
2206

2207
        if not fetched:
1✔
2208
            return None
1✔
2209
        elif csv and not obj.is_csv:
1✔
2210
            return None
×
2211

2212
        # https://stackoverflow.com/a/3042250/186123
2213
        size = len(_entity_to_protobuf(obj)._pb.SerializeToString())
1✔
2214
        if size > MAX_ENTITY_SIZE:
1✔
2215
            logger.warning(f'Object is too big! {size} bytes is over {MAX_ENTITY_SIZE}')
1✔
2216
            return None
1✔
2217

2218
        obj.resolve_ids()
1✔
2219
        obj.normalize_ids()
1✔
2220

2221
        if obj.new is False:
1✔
2222
            obj.changed = obj.activity_changed(orig_as1)
1✔
2223

2224
        if obj.source_protocol not in (cls.LABEL, cls.ABBREV):
1✔
2225
            if obj.source_protocol:
1✔
2226
                logger.warning(f'Object {obj.key.id()} changed protocol from {obj.source_protocol} to {cls.LABEL} ?!')
×
2227
            obj.source_protocol = cls.LABEL
1✔
2228

2229
        obj.put()
1✔
2230
        return obj
1✔
2231

2232
    @classmethod
1✔
2233
    def check_supported(cls, obj, direction):
1✔
2234
        """If this protocol doesn't support this activity, raises HTTP 204.
2235

2236
        Also reports an error.
2237

2238
        (This logic is duplicated in some protocols, eg ActivityPub, so that
2239
        they can short circuit out early. It generally uses their native formats
2240
        instead of AS1, before an :class:`models.Object` is created.)
2241

2242
        Args:
2243
          obj (Object)
2244
          direction (str): ``'receive'`` or  ``'send'``
2245

2246
        Raises:
2247
          werkzeug.HTTPException: if this protocol doesn't support this object
2248
        """
2249
        assert direction in ('receive', 'send')
1✔
2250
        if not obj.type:
1✔
2251
            return
×
2252

2253
        inner = as1.get_object(obj.as1)
1✔
2254
        inner_type = as1.object_type(inner) or ''
1✔
2255
        if (obj.type not in cls.SUPPORTED_AS1_TYPES
1✔
2256
            or (obj.type in as1.CRUD_VERBS
2257
                and inner_type
2258
                and inner_type not in cls.SUPPORTED_AS1_TYPES)):
2259
            error(f"Bridgy Fed for {cls.LABEL} doesn't support {obj.type} {inner_type} yet", status=204)
1✔
2260

2261
        # don't allow posts with blank content and no image/video/audio
2262
        crud_obj = (as1.get_object(obj.as1) if obj.type in ('post', 'update')
1✔
2263
                    else obj.as1)
2264
        if (crud_obj.get('objectType') in as1.POST_TYPES
1✔
2265
                and not util.get_url(crud_obj, key='image')
2266
                and not any(util.get_urls(crud_obj, 'attachments', inner_key='stream'))
2267
                # TODO: handle articles with displayName but not content
2268
                and not source.html_to_text(crud_obj.get('content')).strip()):
2269
            error('Blank content and no image or video or audio', status=204)
1✔
2270

2271
        # receiving DMs is only allowed to protocol bot accounts
2272
        if direction == 'receive':
1✔
2273
            if recip := as1.recipient_if_dm(obj.as1):
1✔
2274
                owner = as1.get_owner(obj.as1)
1✔
2275
                if (not cls.SUPPORTS_DMS or (recip not in common.bot_user_ids()
1✔
2276
                                             and owner not in common.bot_user_ids())):
2277
                    # reply and say DMs aren't supported
2278
                    from_proto = obj.owner_protocol()
1✔
2279
                    to_proto = Protocol.for_id(recip)
1✔
2280
                    if owner and from_proto and to_proto:
1✔
2281
                        if ((from_user := from_proto.get_or_create(id=owner))
1✔
2282
                                and (to_user := to_proto.get_or_create(id=recip))):
2283
                            in_reply_to = (inner.get('id') if obj.type == 'post'
1✔
2284
                                           else obj.as1.get('id'))
2285
                            text = f"Hi! Sorry, this account is bridged from {to_user.PHRASE}, so it doesn't support DMs. Try getting in touch another way!"
1✔
2286
                            type = f'dms_not_supported-{to_user.key.id()}'
1✔
2287
                            dms.maybe_send(from_=to_user, to_user=from_user,
1✔
2288
                                           text=text, type=type,
2289
                                           in_reply_to=in_reply_to)
2290

2291
                    error("Bridgy Fed doesn't support DMs", status=204)
1✔
2292

2293
            # check that this activity is public. only do this for some activities,
2294
            # not eg likes or follows, since Mastodon doesn't currently mark those
2295
            # as explicitly public.
2296
            elif (obj.type in set(('post', 'update')) | as1.POST_TYPES | as1.ACTOR_TYPES
1✔
2297
                  and not util.domain_or_parent_in(crud_obj.get('id'), NON_PUBLIC_DOMAINS)
2298
                  and not as1.is_public(obj.as1, unlisted=False)):
2299
                error('Bridgy Fed only supports public activities', status=204)
1✔
2300

2301
    @classmethod
1✔
2302
    def block(cls, from_user, arg):
1✔
2303
        """Blocks a user or list.
2304

2305
        Args:
2306
          from_user (models.User): user doing the blocking
2307
          arg (str): handle or id of user/list to block
2308

2309
        Returns:
2310
          models.User or models.Object: user or list that was blocked
2311

2312
        Raises:
2313
          ValueError: if arg doesn't look like a user or list on this protocol
2314
        """
2315
        logger.info(f'user {from_user.key.id()} trying to block {arg}')
1✔
2316

2317
        def fail(msg):
1✔
2318
            logger.warning(msg)
1✔
2319
            raise ValueError(msg)
1✔
2320

2321
        blockee = None
1✔
2322
        try:
1✔
2323
            # first, try interpreting as a user handle or id
2324
            blockee = load_user(arg, proto=cls, create=True, allow_opt_out=True)
1✔
2325
        except (AssertionError, AttributeError, BadRequest, RuntimeError, ValueError) as err:
1✔
2326
            logger.info(err)
1✔
2327

2328
        if type(from_user) == type(blockee):
1✔
2329
            fail(f'{blockee.html_link()} is on {from_user.PHRASE}! Try blocking them there.')
1✔
2330

2331
        # may not be a user, see if it's a list
2332
        if not blockee:
1✔
2333
            if not cls or cls == Protocol:
1✔
2334
                cls = Protocol.for_id(arg)
1✔
2335

2336
            if cls and (blockee := cls.load(arg)) and blockee.type == 'collection':
1✔
2337
                if blockee.source_protocol == from_user.LABEL:
1✔
2338
                    fail(f'{blockee.html_link()} is on {from_user.PHRASE}! Try blocking it there.')
1✔
2339
            else:
2340
                if blocklist := from_user.add_domain_blocklist(arg):
1✔
2341
                    return blocklist
1✔
2342
                fail(f"{arg} doesn't look like a user or list{' on ' + cls.PHRASE if cls else ''}, or we couldn't fetch it")
1✔
2343

2344
        logger.info(f'  blocking {blockee.key.id()}')
1✔
2345
        id = f'{from_user.key.id()}#bridgy-fed-block-{util.now().isoformat()}'
1✔
2346
        obj = Object(id=id, source_protocol=from_user.LABEL, our_as1={
1✔
2347
            'objectType': 'activity',
2348
            'verb': 'block',
2349
            'id': id,
2350
            'actor': from_user.key.id(),
2351
            'object': blockee.key.id(),
2352
        })
2353
        obj.put()
1✔
2354
        from_user.deliver(obj, from_user=from_user)
1✔
2355

2356
        return blockee
1✔
2357

2358
    @classmethod
1✔
2359
    def unblock(cls, from_user, arg):
1✔
2360
        """Unblocks a user or list.
2361

2362
        Args:
2363
          from_user (models.User): user doing the unblocking
2364
          arg (str): handle or id of user/list to unblock
2365

2366
        Returns:
2367
          models.User or models.Object: user or list that was unblocked
2368

2369
        Raises:
2370
          ValueError: if arg doesn't look like a user or list on this protocol
2371
        """
2372
        logger.info(f'user {from_user.key.id()} trying to unblock {arg}')
1✔
2373
        def fail(msg):
1✔
2374
            logger.warning(msg)
1✔
2375
            raise ValueError(msg)
1✔
2376

2377
        blockee = None
1✔
2378
        try:
1✔
2379
            # first, try interpreting as a user handle or id
2380
            blockee = load_user(arg, cls, create=True, allow_opt_out=True)
1✔
2381
        except (AssertionError, AttributeError, BadRequest, RuntimeError, ValueError) as err:
1✔
2382
            logger.info(err)
1✔
2383

2384
        if type(from_user) == type(blockee):
1✔
2385
            fail(f'{blockee.html_link()} is on {from_user.PHRASE}! Try unblocking them there.')
1✔
2386

2387
        # may not be a user, see if it's a list
2388
        if not blockee:
1✔
2389
            if not cls or cls == Protocol:
1✔
2390
                cls = Protocol.for_id(arg)
1✔
2391

2392
            if cls and (blockee := cls.load(arg)) and blockee.type == 'collection':
1✔
2393
                if blockee.source_protocol == from_user.LABEL:
1✔
2394
                    fail(f'{blockee.html_link()} is on {from_user.PHRASE}! Try blocking it there.')
1✔
2395
            else:
2396
                if blocklist := from_user.remove_domain_blocklist(arg):
1✔
2397
                    return blocklist
1✔
2398
                fail(f"{arg} doesn't look like a user or list{' on ' + cls.PHRASE if cls else ''}, or we couldn't fetch it")
1✔
2399

2400
        logger.info(f'  unblocking {blockee.key.id()}')
1✔
2401
        id = f'{from_user.key.id()}#bridgy-fed-unblock-{util.now().isoformat()}'
1✔
2402
        obj = Object(id=id, source_protocol=from_user.LABEL, our_as1={
1✔
2403
            'objectType': 'activity',
2404
            'verb': 'undo',
2405
            'id': id,
2406
            'actor': from_user.key.id(),
2407
            'object': {
2408
                'objectType': 'activity',
2409
                'verb': 'block',
2410
                'actor': from_user.key.id(),
2411
                'object': blockee.key.id(),
2412
            },
2413
        })
2414
        obj.put()
1✔
2415
        from_user.deliver(obj, from_user=from_user)
1✔
2416

2417
        return blockee
1✔
2418

2419

2420
@cloud_tasks_only(log=None)
1✔
2421
def receive_task():
1✔
2422
    """Task handler for a newly received :class:`models.Object`.
2423

2424
    Calls :meth:`Protocol.receive` with the form parameters.
2425

2426
    Parameters:
2427
      authed_as (str): passed to :meth:`Protocol.receive`
2428
      obj_id (str): key id of :class:`models.Object` to handle
2429
      received_at (str, ISO 8601 timestamp): when we first saw (received)
2430
        this activity
2431
      *: If ``obj_id`` is unset, all other parameters are properties for a new
2432
        :class:`models.Object` to handle
2433

2434
    TODO: migrate incoming webmentions to this. See how we did it for AP. The
2435
    difficulty is that parts of :meth:`protocol.Protocol.receive` depend on
2436
    setup in :func:`web.webmention`, eg :class:`models.Object` with ``new`` and
2437
    ``changed``, HTTP request details, etc. See stash for attempt at this for
2438
    :class:`web.Web`.
2439
    """
2440
    common.log_request()
1✔
2441
    form = request.form.to_dict()
1✔
2442

2443
    authed_as = form.pop('authed_as', None)
1✔
2444
    internal = authed_as == PRIMARY_DOMAIN or authed_as in PROTOCOL_DOMAINS
1✔
2445

2446
    obj = Object.from_request()
1✔
2447
    assert obj
1✔
2448
    assert obj.source_protocol
1✔
2449
    obj.new = True
1✔
2450

2451
    if received_at := form.pop('received_at', None):
1✔
2452
        received_at = datetime.fromisoformat(received_at)
1✔
2453

2454
    try:
1✔
2455
        return PROTOCOLS[obj.source_protocol].receive(
1✔
2456
            obj=obj, authed_as=authed_as, internal=internal, received_at=received_at)
2457
    except RequestException as e:
1✔
2458
        util.interpret_http_exception(e)
1✔
2459
        error(e, status=304)
1✔
2460
    except (RuntimeError, ValueError) as e:
1✔
2461
        logger.warning(e, exc_info=True)
×
2462
        error(e, status=304)
×
2463

2464

2465
@cloud_tasks_only(log=None)
1✔
2466
def send_task():
1✔
2467
    """Task handler for sending an activity to a single specific destination.
2468

2469
    Calls :meth:`Protocol.send` with the form parameters.
2470

2471
    Parameters:
2472
      protocol (str): :class:`Protocol` to send to
2473
      url (str): destination URL to send to
2474
      obj_id (str): key id of :class:`models.Object` to send
2475
      orig_obj_id (str): optional, :class:`models.Object` key id of the
2476
        "original object" that this object refers to, eg replies to or reposts
2477
        or likes
2478
      user (url-safe google.cloud.ndb.key.Key): :class:`models.User` (actor)
2479
        this activity is from
2480
      *: If ``obj_id`` is unset, all other parameters are properties for a new
2481
        :class:`models.Object` to handle
2482
    """
2483
    common.log_request()
1✔
2484

2485
    # prepare
2486
    form = request.form.to_dict()
1✔
2487
    url = form.get('url')
1✔
2488
    protocol = form.get('protocol')
1✔
2489
    if not url or not protocol:
1✔
2490
        logger.warning(f'Missing protocol or url; got {protocol} {url}')
1✔
2491
        return '', 204
1✔
2492

2493
    target = Target(uri=url, protocol=protocol)
1✔
2494
    obj = Object.from_request()
1✔
2495
    assert obj and obj.key and obj.key.id()
1✔
2496

2497
    PROTOCOLS[protocol].check_supported(obj, 'send')
1✔
2498
    allow_opt_out = (obj.type == 'delete')
1✔
2499

2500
    user = None
1✔
2501
    if user_key := form.get('user'):
1✔
2502
        key = ndb.Key(urlsafe=user_key)
1✔
2503
        # use get_by_id so that we follow use_instead
2504
        user = PROTOCOLS_BY_KIND[key.kind()].get_by_id(
1✔
2505
            key.id(), allow_opt_out=allow_opt_out)
2506

2507
    # send
2508
    delay = ''
1✔
2509
    if request.headers.get('X-AppEngine-TaskRetryCount') == '0' and obj.created:
1✔
2510
        delay_s = int((util.now().replace(tzinfo=None) - obj.created).total_seconds())
1✔
2511
        delay = f'({delay_s} s behind)'
1✔
2512
    logger.info(f'Sending {obj.source_protocol} {obj.type} {obj.key.id()} to {protocol} {url} {delay}')
1✔
2513
    logger.debug(f'  AS1: {json_dumps(obj.as1, indent=2)}')
1✔
2514
    sent = None
1✔
2515
    try:
1✔
2516
        sent = PROTOCOLS[protocol].send(obj, url, from_user=user,
1✔
2517
                                        orig_obj_id=form.get('orig_obj_id'))
2518
    except (MemcacheServerError, MemcacheUnexpectedCloseError,
1✔
2519
            MemcacheUnknownError) as e:
2520
        # our memorystore instance is probably undergoing maintenance. re-enqueue
2521
        # task with a delay.
2522
        # https://docs.cloud.google.com/memorystore/docs/memcached/about-maintenance
2523
        report_error(f'memcache error on send task, re-enqueuing in {MEMCACHE_DOWN_TASK_DELAY}: {e}')
1✔
2524
        common.create_task(queue='send', delay=MEMCACHE_DOWN_TASK_DELAY, **form)
1✔
2525
        sent = False
1✔
2526
    except BaseException as e:
1✔
2527
        code, body = util.interpret_http_exception(e)
1✔
2528
        if not code and not body:
1✔
2529
            raise
1✔
2530

2531
    if sent is False:
1✔
2532
        logger.info(f'Failed sending!')
1✔
2533

2534
    return '', 200 if sent else 204 if sent is False else 304
1✔
2535

2536

2537
@cloud_tasks_only(log=None)
1✔
2538
def user_enabled_task():
1✔
2539
    r"""Task handler for when a user enables a protocol.
2540

2541
    DMs any dormant :class:`models.Follower`\s pointing at the user to let them
2542
    know the user is now bridged, so they can follow them for real, and flips
2543
    those ``Follower``\s from ``dormant`` to ``inactive``.
2544

2545
    Parameters:
2546
      user (url-safe google.cloud.ndb.key.Key): the :class:`models.User` who
2547
        enabled bridging
2548
      protocol (str): ``LABEL`` of the protocol they enabled
2549
    """
2550
    common.log_request()
1✔
2551

2552
    proto = PROTOCOLS[request.form['protocol']]
1✔
2553
    user = ndb.Key(urlsafe=request.form['user']).get()
1✔
2554
    assert user
1✔
2555
    logger.info(f'{user.key.id()} is {user.status or "ok"}')
1✔
2556
    if user.status:
1✔
2557
        raise ErrorButDoNotRetryTask()
×
2558

2559
    followers = Follower.query(Follower.to == user.key,
1✔
2560
                               Follower.status == 'dormant').fetch()
2561
    from_users = ndb.get_multi(
1✔
2562
        f.from_ for f in followers if f.from_.kind() == proto._get_kind())
2563

2564
    for follower, from_user in zip(followers, from_users):
1✔
2565
        if from_user and not from_user.status:
1✔
2566
            logger.info('Updating and DMing Follower from {from_user.key.id()}')
1✔
2567
            follower.status = 'inactive'
1✔
2568
            follower.put()
1✔
2569

2570
            relationship = {
1✔
2571
                'bounce': ', who you originally followed before you Bounced,',
2572
                'requested': ', who you asked to bridge,',
2573
            }.get(follower.reason, '')
2574
            dms.maybe_send(from_=proto, to_user=from_user, text=f'<p>Hi! {user.html_link(proto=proto, proto_fallback=True)}{relationship} has bridged their account into {proto.PHRASE}. You can follow them now if you want.')
1✔
2575

2576
    return '', 200
1✔
2577

2578

2579
@cloud_tasks_only(log=None)
1✔
2580
def migrate_out_task():
1✔
2581
    """Task handler for finishing a migration out.
2582

2583
    Currently, for migrating out to ATProto, uploads the user's blobs to the new PDS.
2584
    Otherwise, does nothing.
2585

2586
    Parameters:
2587
      user (str, url-safe ndb.Key of a User): the bridged :class:`models.User`
2588
        migrating out
2589
      protocol (str): destination protocol
2590
      auth (optional url-safe ndb.Key of an oauth-dropins auth entity): the user's
2591
        new account. For ATProto, an :class:`oauth_dropins.bluesky.BlueskyAuth`.
2592
    """
2593
    from atproto import ATProto
1✔
2594

2595
    common.log_request()
1✔
2596

2597
    user = ndb.Key(urlsafe=request.form['user']).get()
1✔
2598
    if not user:
1✔
2599
        raise ErrorButDoNotRetryTask()
×
2600

2601
    if request.form['protocol'] == ATProto.LABEL:
1✔
2602
        auth = ndb.Key(urlsafe=request.form['auth']).get()
1✔
2603
        assert auth
1✔
2604
        ATProto.migrate_out_blobs(user, auth)
1✔
2605

2606
    return '', 200
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc