• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

snarfed / bridgy-fed / 4d407a56-d783-4f38-b08a-6ac346225a30

22 May 2026 03:11PM UTC coverage: 94.17% (+0.02%) from 94.152%
4d407a56-d783-4f38-b08a-6ac346225a30

push

circleci

snarfed
add new migrate-out task queue, use it to copy ATProto blobs to new PDS

adds new ATProto.migrate_out_blobs classmethod. will also switch Bounce to use this.

for #1137

38 of 39 new or added lines in 5 files covered. (97.44%)

9 existing lines in 1 file now uncovered.

7543 of 8010 relevant lines covered (94.17%)

0.94 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

95.87
/protocol.py
1
"""Base protocol class and common code."""
2
from bs4 import BeautifulSoup
1✔
3
import copy
1✔
4
from datetime import datetime, timedelta, timezone
1✔
5
import logging
1✔
6
import os
1✔
7
import re
1✔
8
from threading import Lock
1✔
9
from urllib.parse import urljoin, urlparse
1✔
10

11
from cachetools import cached, LRUCache
1✔
12
from flask import request
1✔
13
from google.cloud import ndb
1✔
14
from google.cloud.ndb import OR
1✔
15
from google.cloud.ndb.model import _entity_to_protobuf
1✔
16
from granary import as1, as2, source
1✔
17
from granary.source import HTML_ENTITY_RE, html_to_text
1✔
18
from oauth_dropins.webutil.appengine_info import DEBUG
1✔
19
from oauth_dropins.webutil.flask_util import cloud_tasks_only
1✔
20
from oauth_dropins.webutil.models import MAX_ENTITY_SIZE
1✔
21
from oauth_dropins.webutil import util
1✔
22
from oauth_dropins.webutil.util import json_dumps, json_loads
1✔
23
from pymemcache.exceptions import (
1✔
24
    MemcacheServerError,
25
    MemcacheUnexpectedCloseError,
26
    MemcacheUnknownError,
27
)
28
from requests import RequestException
1✔
29
from websockets.exceptions import InvalidStatus
1✔
30
import werkzeug.exceptions
1✔
31
from werkzeug.exceptions import BadGateway, BadRequest, HTTPException
1✔
32

33
import common
1✔
34
from common import (
1✔
35
    ErrorButDoNotRetryTask,
36
    report_error,
37
)
38
from domains import (
1✔
39
    DOMAINS,
40
    LOCAL_DOMAINS,
41
    PRIMARY_DOMAIN,
42
    PROTOCOL_DOMAINS,
43
    SUPERDOMAIN,
44
)
45
import dms
1✔
46
from domains import DOMAIN_BLOCKLIST
1✔
47
import ids
1✔
48
import memcache
1✔
49
from models import (
1✔
50
    Follower,
51
    get_original_user_key,
52
    load_user,
53
    Object,
54
    PROTOCOLS,
55
    PROTOCOLS_BY_KIND,
56
    Target,
57
    User,
58
)
59
import notifications
1✔
60

61
OBJECT_REFRESH_AGE = timedelta(days=30)
1✔
62
DELETE_TASK_DELAY = timedelta(minutes=1)
1✔
63
CREATE_MAX_AGE = timedelta(weeks=2)
1✔
64
CREATE_MAX_AGE_EXEMPT_DOMAINS = (
1✔
65
    'alt.store',
66
)
67
# WARNING: keep this below the receive queue's min_backoff_seconds in queue.yaml!
68
MEMCACHE_LEASE_EXPIRATION = timedelta(seconds=25)
1✔
69
MEMCACHE_DOWN_TASK_DELAY = timedelta(minutes=5)
1✔
70
# WARNING: keep this in sync with queue.yaml's receive and webmention task_retry_limit!
71
TASK_RETRIES_RECEIVE = 4
1✔
72
# https://docs.cloud.google.com/tasks/docs/creating-appengine-handlers#reading-headers
73
TASK_RETRIES_HEADER = 'X-AppEngine-TaskRetryCount'
1✔
74

75
# require a follow for users on these domains before we deliver anything from
76
# them other than their profile
77
LIMITED_DOMAINS = (os.getenv('LIMITED_DOMAINS', '').split()
1✔
78
                   or util.load_file_lines('limited_domains'))
79

80
# domains to allow non-public activities from
81
NON_PUBLIC_DOMAINS = (
1✔
82
    # bridged from twitter (X). bird.makeup, kilogram.makeup, etc federate
83
    # tweets as followers-only, but they're public on twitter itself
84
    '.makeup',
85
)
86

87
DONT_STORE_AS1_TYPES = as1.CRUD_VERBS | set((
1✔
88
    'accept',
89
    'reject',
90
    'stop-following',
91
    'undo',
92
))
93
STORE_AS1_TYPES = (as1.ACTOR_TYPES | as1.POST_TYPES | as1.VERBS_WITH_OBJECT
1✔
94
                   - DONT_STORE_AS1_TYPES)
95

96
DONT_NOTIFY_TYPES = (
1✔
97
    'block',
98
)
99

100
logger = logging.getLogger(__name__)
1✔
101

102

103
def error(*args, status=299, **kwargs):
1✔
104
    """Default HTTP status code to 299 to prevent retrying task."""
105
    return common.error(*args, status=status, **kwargs)
1✔
106

107

108
def activity_id_memcache_key(id):
1✔
109
    return memcache.key(f'receive-{id}')
1✔
110

111

112
class Protocol:
1✔
113
    """Base protocol class. Not to be instantiated; classmethods only."""
114
    ABBREV = None
1✔
115
    'str: lower case abbreviation, used in URL paths'
1✔
116
    PHRASE = None
1✔
117
    'str: human-readable name or phrase. Used in phrases like ``Follow this person on {PHRASE}``'
1✔
118
    OTHER_LABELS = ()
1✔
119
    'sequence of str: label aliases'
1✔
120
    LOGO_EMOJI = ''
1✔
121
    'str: logo emoji, if any'
1✔
122
    LOGO_HTML = ''
1✔
123
    'str: logo ``<img>`` tag, if any'
1✔
124
    CONTENT_TYPE = None
1✔
125
    "str: MIME type of this protocol's native data format, appropriate for the ``Content-Type`` HTTP header."
1✔
126
    HAS_COPIES = False
1✔
127
    'bool: whether this protocol is push and needs us to proactively create "copy" users and objects, as opposed to pulling converted objects on demand'
1✔
128
    DEFAULT_TARGET = None
1✔
129
    'str: optional, the default target URI to send this protocol\'s activities to. May be used as the "shared" target. Often only set if ``HAS_COPIES`` is true.'
1✔
130
    REQUIRES_AVATAR = False
1✔
131
    "bool: whether accounts on this protocol are required to have a profile picture. If they don't, their ``User.status`` will be ``blocked``."
1✔
132
    REQUIRES_NAME = False
1✔
133
    "bool: whether accounts on this protocol are required to have a profile name that's different than their handle or id. If they don't, their ``User.status`` will be ``blocked``."
1✔
134
    REQUIRES_OLD_ACCOUNT = False
1✔
135
    "bool: whether accounts on this protocol are required to be at least :const:`common.OLD_ACCOUNT_AGE` old. If their profile includes creation date and it's not old enough, their ``User.status`` will be ``blocked``."
1✔
136
    DEFAULT_ENABLED_PROTOCOLS = ()
1✔
137
    'sequence of str: labels of other protocols that are automatically enabled for this protocol to bridge into'
1✔
138
    DEFAULT_SERVE_USER_PAGES = False
1✔
139
    "bool: whether to serve user pages for all of this protocol's users on the fed.brid.gy. If ``False``, user pages will only be served for users who have explictly opted in."
1✔
140
    SUPPORTED_AS1_TYPES = ()
1✔
141
    'sequence of str: AS1 objectTypes and verbs that this protocol supports receiving and sending'
1✔
142
    SUPPORTS_DMS = False
1✔
143
    'bool: whether this protocol can receive DMs (chat messages)'
1✔
144
    USES_OBJECT_FEED = False
1✔
145
    'bool: whether to store followers on this protocol in :attr:`Object.feed`.'
1✔
146
    HTML_PROFILES = False
1✔
147
    'bool: whether this protocol supports HTML in profile descriptions. If False, profile descriptions should be plain text.'
1✔
148
    SEND_REPLIES_TO_ORIG_POSTS_MENTIONS = False
1✔
149
    "bool: whether replies to this protocol should include the original post's mentions as delivery targets"
1✔
150
    BOTS_FOLLOW_BACK = False
1✔
151
    'bool: when a user on this protocol follows a bot user to enable bridging, does the bot follow them back?'
1✔
152
    HANDLES_PER_PAY_LEVEL_DOMAIN = None
1✔
153
    'int: how many users to allow with handles on the same pay-level domain. None for no limit.'
1✔
154
    RECEIVE_FILTERS = ()
1✔
155
    'tuple of callable: filter functions from filters.py to apply to incoming activities. Applied in order, so put the cheapest filters first.'
1✔
156
    RATE_LIMIT_TYPE = memcache.RateLimitType.LINEAR
1✔
157
    'Whether receive and send task rate limiting increases linearly or exponential.'
1✔
158

159
    @classmethod
1✔
160
    @property
1✔
161
    def LABEL(cls):
1✔
162
        """str: human-readable lower case name of this protocol, eg ``'activitypub``"""
163
        return cls.__name__.lower()
1✔
164

165
    @staticmethod
1✔
166
    def for_request(fed=None):
1✔
167
        """Returns the protocol for the current request.
168

169
        ...based on the request's hostname.
170

171
        Args:
172
          fed (str or protocol.Protocol): protocol to return if the current
173
            request is on ``fed.brid.gy``
174

175
        Returns:
176
          Protocol: protocol, or None if the provided domain or request hostname
177
          domain is not a subdomain of ``brid.gy`` or isn't a known protocol
178
        """
179
        return Protocol.for_bridgy_subdomain(request.host, fed=fed)
1✔
180

181
    @staticmethod
1✔
182
    def for_bridgy_subdomain(domain_or_url, fed=None):
1✔
183
        """Returns the protocol for a brid.gy subdomain.
184

185
        Args:
186
          domain_or_url (str)
187
          fed (str or protocol.Protocol): protocol to return if the current
188
            request is on ``fed.brid.gy``
189

190
        Returns:
191
          class: :class:`Protocol` subclass, or None if the provided domain or request
192
          hostname domain is not a subdomain of ``brid.gy`` or isn't a known
193
          protocol
194
        """
195
        domain = (util.domain_from_link(domain_or_url, minimize=False)
1✔
196
                  if util.is_web(domain_or_url)
197
                  else domain_or_url)
198

199
        if domain == PRIMARY_DOMAIN or domain in LOCAL_DOMAINS:
1✔
200
            return PROTOCOLS[fed] if isinstance(fed, str) else fed
1✔
201
        elif domain and domain.endswith(SUPERDOMAIN):
1✔
202
            label = domain.removesuffix(SUPERDOMAIN)
1✔
203
            return PROTOCOLS.get(label)
1✔
204

205
    @classmethod
1✔
206
    def owns_id(cls, id):
1✔
207
        """Returns whether this protocol owns the id, or None if it's unclear.
208

209
        To be implemented by subclasses.
210

211
        IDs are string identities that uniquely identify users or objects, and
212
        are intended primarily to be machine readable and usable. Compare to
213
        handles, which are human-chosen, human-meaningful, and often but not
214
        always unique.
215

216
        Some protocols' ids are more or less deterministic based on the id
217
        format, eg AT Protocol owns ``at://`` URIs and DIDs. Others, like
218
        http(s) URLs, could be owned by eg Web or ActivityPub.
219

220
        This should be a quick guess without expensive side effects, eg no
221
        external HTTP fetches to fetch the id itself or otherwise perform
222
        discovery.
223

224
        Returns False if the id's domain is in :const:`domains.DOMAIN_BLOCKLIST`.
225

226
        Args:
227
          id (str): user id or object id
228

229
        Returns:
230
          bool or None:
231
        """
232
        return False
1✔
233

234
    @classmethod
1✔
235
    def owns_handle(cls, handle, allow_internal=False):
1✔
236
        """Returns whether this protocol owns the handle, or None if it's unclear.
237

238
        To be implemented by subclasses.
239

240
        Handles are string identities that are human-chosen, human-meaningful,
241
        and often but not always unique. Compare to IDs, which uniquely identify
242
        users, and are intended primarily to be machine readable and usable.
243

244
        Some protocols' handles are more or less deterministic based on the id
245
        format, eg ActivityPub (technically WebFinger) handles are
246
        ``@user@instance.com``. Others, like domains, could be owned by eg Web,
247
        ActivityPub, AT Protocol, or others.
248

249
        This should be a quick guess without expensive side effects, eg no
250
        external HTTP fetches to fetch the id itself or otherwise perform
251
        discovery.
252

253
        Args:
254
          handle (str)
255
          allow_internal (bool): whether to return False for internal domains
256
            like ``fed.brid.gy``, ``bsky.brid.gy``, etc
257

258
        Returns:
259
          bool or None
260
        """
261
        return False
1✔
262

263
    @classmethod
1✔
264
    def handle_to_id(cls, handle):
1✔
265
        """Converts a handle to an id.
266

267
        To be implemented by subclasses.
268

269
        May incur network requests, eg DNS queries or HTTP requests. Avoids
270
        blocked or opted out users.
271

272
        Args:
273
          handle (str)
274

275
        Returns:
276
          str: corresponding id, or None if the handle can't be found
277
        """
278
        raise NotImplementedError()
1✔
279

280
    @classmethod
1✔
281
    def authed_user_for_request(cls):
1✔
282
        """Returns the authenticated user id for the current request.
283

284

285
        Checks authentication on the current request, eg HTTP Signature for
286
        ActivityPub. To be implemented by subclasses.
287

288
        Returns:
289
          str: authenticated user id, or None if there is no authentication
290

291
        Raises:
292
          RuntimeError: if the request's authentication (eg signature) is
293
          invalid or otherwise can't be verified
294
        """
295
        return None
1✔
296

297
    @classmethod
1✔
298
    def key_for(cls, id, allow_opt_out=False):
1✔
299
        """Returns the :class:`google.cloud.ndb.Key` for a given id's :class:`models.User`.
300

301
        If called via `Protocol.key_for`, infers the appropriate protocol with
302
        :meth:`for_id`. If called with a concrete subclass, uses that subclass
303
        as is.
304

305
        Args:
306
          id (str):
307
          allow_opt_out (bool): whether to allow users who are currently opted out
308

309
        Returns:
310
          google.cloud.ndb.Key: matching key, or None if the given id is not a
311
          valid :class:`User` id for this protocol.
312
        """
313
        if cls == Protocol:
1✔
314
            proto = Protocol.for_id(id)
1✔
315
            return proto.key_for(id, allow_opt_out=allow_opt_out) if proto else None
1✔
316

317
        # load user so that we follow use_instead
318
        existing = cls.get_by_id(id, allow_opt_out=True)
1✔
319
        if existing:
1✔
320
            if existing.status and not allow_opt_out:
1✔
321
                return None
1✔
322
            return existing.key
1✔
323

324
        return cls(id=id).key
1✔
325

326
    @staticmethod
1✔
327
    def _for_id_memcache_key(id, remote=None):
1✔
328
        """If id is a URL, uses its domain, otherwise returns None.
329

330
        Args:
331
          id (str)
332

333
        Returns:
334
          (str domain, bool remote) or None
335
        """
336
        domain = util.domain_from_link(id)
1✔
337
        if domain in PROTOCOL_DOMAINS:
1✔
338
            return id
1✔
339
        elif remote and util.is_web(id):
1✔
340
            return domain
1✔
341

342
    @cached(LRUCache(20000), lock=Lock())
1✔
343
    @memcache.memoize(key=_for_id_memcache_key, write=lambda id, remote=True: remote,
1✔
344
                      version=3)
345
    @staticmethod
1✔
346
    def for_id(id, remote=True):
1✔
347
        """Returns the protocol for a given id.
348

349
        Args:
350
          id (str)
351
          remote (bool): whether to perform expensive side effects like fetching
352
            the id itself over the network, or other discovery.
353

354
        Returns:
355
          Protocol subclass: matching protocol, or None if no single known
356
          protocol definitively owns this id
357
        """
358
        logger.debug(f'Determining protocol for id {id}')
1✔
359
        if not id:
1✔
360
            return None
1✔
361

362
        # remove our synthetic id fragment, if any
363
        #
364
        # will this eventually cause false positives for other services that
365
        # include our full ids inside their own ids, non-URL-encoded? guess
366
        # we'll figure that out if/when it happens.
367
        id = id.partition('#bridgy-fed-')[0]
1✔
368
        if not id:
1✔
369
            return None
1✔
370

371
        if util.is_web(id):
1✔
372
            # step 1: check for our per-protocol subdomains
373
            try:
1✔
374
                parsed = urlparse(id)
1✔
375
            except ValueError as e:
1✔
376
                logger.info(f'urlparse ValueError: {e}')
1✔
377
                return None
1✔
378

379
            is_internal = parsed.path.startswith(ids.INTERNAL_PATH_PREFIX)
1✔
380
            by_subdomain = Protocol.for_bridgy_subdomain(id)
1✔
381
            if by_subdomain and not (util.is_homepage(id) or is_internal
1✔
382
                                     or id in ids.BOT_ACTOR_AP_IDS):
383
                logger.debug(f'  {by_subdomain.LABEL} owns id {id}')
1✔
384
                return by_subdomain
1✔
385

386
        # step 2: check if any Protocols say conclusively that they own it
387
        # sort to be deterministic
388
        protocols = sorted(set(p for p in PROTOCOLS.values() if p),
1✔
389
                           key=lambda p: p.LABEL)
390
        candidates = []
1✔
391
        for protocol in protocols:
1✔
392
            owns = protocol.owns_id(id)
1✔
393
            if owns:
1✔
394
                logger.debug(f'  {protocol.LABEL} owns id {id}')
1✔
395
                return protocol
1✔
396
            elif owns is not False:
1✔
397
                candidates.append(protocol)
1✔
398

399
        if len(candidates) == 1:
1✔
400
            logger.debug(f'  {candidates[0].LABEL} owns id {id}')
1✔
401
            return candidates[0]
1✔
402

403
        # step 3: look for existing Objects in the datastore
404
        #
405
        # note that we don't currently see if this is a copy id because I have FUD
406
        # over which Protocol for_id should return in that case...and also because a
407
        # protocol may already say definitively above that it owns the id, eg ATProto
408
        # with DIDs and at:// URIs.
409
        obj = Protocol.load(id, remote=False)
1✔
410
        if obj and obj.source_protocol:
1✔
411
            logger.debug(f'  {obj.key.id()} owned by source_protocol {obj.source_protocol}')
1✔
412
            return PROTOCOLS[obj.source_protocol]
1✔
413

414
        # step 4: fetch over the network, if necessary
415
        if not remote:
1✔
416
            return None
1✔
417

418
        for protocol in candidates:
1✔
419
            logger.debug(f'Trying {protocol.LABEL}')
1✔
420
            try:
1✔
421
                obj = protocol.load(id, local=False, remote=True)
1✔
422

423
                if protocol.ABBREV == 'web':
1✔
424
                    # for web, if we fetch and get HTML without microformats,
425
                    # load returns False but the object will be stored in the
426
                    # datastore with source_protocol web, and in cache. load it
427
                    # again manually to check for that.
428
                    obj = Object.get_by_id(id)
1✔
429
                    if obj and obj.source_protocol != 'web':
1✔
430
                        obj = None
×
431

432
                if obj:
1✔
433
                    logger.debug(f'  {protocol.LABEL} owns id {id}')
1✔
434
                    return protocol
1✔
435
            except BadGateway:
1✔
436
                # we tried and failed fetching the id over the network.
437
                # this depends on ActivityPub.fetch raising this!
438
                return None
1✔
439
            except HTTPException as e:
×
440
                # internal error we generated ourselves; try next protocol
441
                pass
×
442
            except Exception as e:
×
443
                code, _ = util.interpret_http_exception(e)
×
444
                if code:
×
445
                    # we tried and failed fetching the id over the network
446
                    return None
×
447
                raise
×
448

449
        logger.info(f'No matching protocol found for {id} !')
1✔
450
        return None
1✔
451

452
    @cached(LRUCache(20000), lock=Lock())
1✔
453
    @staticmethod
1✔
454
    def for_handle(handle):
1✔
455
        """Returns the protocol for a given handle.
456

457
        May incur expensive side effects like resolving the handle itself over
458
        the network or other discovery.
459

460
        Args:
461
          handle (str)
462

463
        Returns:
464
          (Protocol subclass, str) tuple: matching protocol and optional id (if
465
          resolved), or ``(None, None)`` if no known protocol owns this handle
466
        """
467
        # TODO: normalize, eg convert domains to lower case
468
        logger.debug(f'Determining protocol for handle {handle}')
1✔
469
        if not handle:
1✔
470
            return (None, None)
1✔
471

472
        # step 1: check if any Protocols say conclusively that they own it.
473
        # sort to be deterministic.
474
        protocols = sorted(set(p for p in PROTOCOLS.values() if p),
1✔
475
                           key=lambda p: p.LABEL)
476
        candidates = []
1✔
477
        for proto in protocols:
1✔
478
            owns = proto.owns_handle(handle)
1✔
479
            if owns:
1✔
480
                logger.debug(f'  {proto.LABEL} owns handle {handle}')
1✔
481
                return (proto, None)
1✔
482
            elif owns is not False:
1✔
483
                candidates.append(proto)
1✔
484

485
        if len(candidates) == 1:
1✔
486
            logger.debug(f'  {candidates[0].LABEL} owns handle {handle}')
1✔
487
            return (candidates[0], None)
1✔
488

489
        # step 2: look for matching User in the datastore
490
        for proto in candidates:
1✔
491
            user = proto.query(proto.handle == handle).get()
1✔
492
            if user:
1✔
493
                if user.status:
1✔
494
                    return (None, None)
1✔
495
                logger.debug(f'  user {user.key} handle {handle}')
1✔
496
                return (proto, user.key.id())
1✔
497

498
        # step 3: resolve handle to id
499
        for proto in candidates:
1✔
500
            id = proto.handle_to_id(handle)
1✔
501
            if id:
1✔
502
                logger.debug(f'  {proto.LABEL} resolved handle {handle} to id {id}')
1✔
503
                return (proto, id)
1✔
504

505
        logger.info(f'No matching protocol found for handle {handle} !')
1✔
506
        return (None, None)
1✔
507

508
    @classmethod
1✔
509
    def is_user_at_domain(cls, handle, allow_internal=False):
1✔
510
        """Returns True if handle is formatted ``user@domain.tld``, False otherwise.
511

512
        Example: ``@user@instance.com``
513

514
        Args:
515
          handle (str)
516
          allow_internal (bool): whether the domain can be a Bridgy Fed domain
517
        """
518
        parts = handle.split('@')
1✔
519
        if len(parts) != 2:
1✔
520
            return False
1✔
521

522
        user, domain = parts
1✔
523
        return bool(user and domain
1✔
524
                    and not cls.is_blocklisted(domain, allow_internal=allow_internal))
525

526
    @classmethod
1✔
527
    def bridged_web_url_for(cls, user, fallback=False):
1✔
528
        """Returns the web URL for a user's bridged profile in this protocol.
529

530
        For example, for Web user ``alice.com``, :meth:`ATProto.bridged_web_url_for`
531
        returns ``https://bsky.app/profile/alice.com.web.brid.gy``
532

533
        Args:
534
          user (models.User)
535
          fallback (bool): if True, and bridged users have no canonical user
536
            profile URL in this protocol, return the native protocol's profile URL
537

538
        Returns:
539
          str, or None if there isn't a canonical URL
540
        """
541
        if fallback:
1✔
542
            return user.web_url()
1✔
543

544
    @classmethod
1✔
545
    def actor_key(cls, obj, allow_opt_out=False):
1✔
546
        """Returns the :class:`User`: key for a given object's author or actor.
547

548
        Args:
549
          obj (models.Object)
550
          allow_opt_out (bool): whether to return a user key if they're opted out
551

552
        Returns:
553
          google.cloud.ndb.key.Key or None:
554
        """
555
        owner = as1.get_owner(obj.as1)
1✔
556
        if owner:
1✔
557
            return cls.key_for(owner, allow_opt_out=allow_opt_out)
1✔
558

559
    @classmethod
1✔
560
    def bot_user_id(cls):
1✔
561
        """Returns the Web user id for the bot user for this protocol.
562

563
        For example, ``'bsky.brid.gy'`` for ATProto.
564

565
        Returns:
566
          str:
567
        """
568
        return f'{cls.ABBREV}{SUPERDOMAIN}'
1✔
569

570
    @classmethod
1✔
571
    def create_for(cls, user):
1✔
572
        """Creates or re-activate a copy user in this protocol.
573

574
        Should add the copy user to :attr:`copies`.
575

576
        If the copy user already exists and active, should do nothing.
577

578
        Args:
579
          user (models.User): original source user. Shouldn't already have a
580
            copy user for this protocol in :attr:`copies`.
581

582
        Raises:
583
          ValueError: if we can't create a copy of the given user in this protocol
584
        """
585
        raise NotImplementedError()
×
586

587
    @classmethod
1✔
588
    def send(to_cls, obj, target, from_user=None, orig_obj_id=None):
1✔
589
        """Sends an outgoing activity.
590

591
        To be implemented by subclasses. Should call
592
        ``to_cls.translate_ids(obj.as1)`` before converting it to this Protocol's
593
        format.
594

595
        NOTE: if this protocol's ``HAS_COPIES`` is True, and this method creates a
596
        copy and sends it, it *must* add that copy to the *object*'s (not activity's)
597
        :attr:`copies`, and store it back in the datastore, *in a transaction*!
598

599
        Args:
600
          obj (models.Object): with activity to send
601
          target (str): destination URL to send to
602
          from_user (models.User): user (actor) this activity is from
603
          orig_obj_id (str): :class:`models.Object` key id of the "original object"
604
            that this object refers to, eg replies to or reposts or likes
605

606
        Returns:
607
          bool: True if the activity is sent successfully, False if it is
608
          ignored or otherwise unsent due to protocol logic, eg no webmention
609
          endpoint, protocol doesn't support the activity type. (Failures are
610
          raised as exceptions.)
611

612
        Raises:
613
          werkzeug.HTTPException if the request fails
614
        """
615
        raise NotImplementedError()
×
616

617
    @classmethod
1✔
618
    def fetch(cls, obj, **kwargs):
1✔
619
        """Fetches a protocol-specific object and populates it in an :class:`Object`.
620

621
        Errors are raised as exceptions. If this method returns False, the fetch
622
        didn't fail but didn't succeed either, eg the id isn't valid for this
623
        protocol, or the fetch didn't return valid data for this protocol.
624

625
        To be implemented by subclasses.
626

627
        Args:
628
          obj (models.Object): with the id to fetch. Data is filled into one of
629
            the protocol-specific properties, eg ``as2``, ``mf2``, ``bsky``.
630
          kwargs: subclass-specific
631

632
        Returns:
633
          bool: True if the object was fetched and populated successfully,
634
          False otherwise
635

636
        Raises:
637
          requests.RequestException, werkzeug.HTTPException,
638
          websockets.WebSocketException, etc: if the fetch fails
639
        """
640
        raise NotImplementedError()
×
641

642
    @classmethod
1✔
643
    def convert(cls, obj, from_user=None, **kwargs):
1✔
644
        """Converts an :class:`Object` to this protocol's data format.
645

646
        For example, an HTML string for :class:`Web`, or a dict with AS2 JSON
647
        and ``application/activity+json`` for :class:`ActivityPub`.
648

649
        Just passes through to :meth:`_convert`, then does minor
650
        protocol-independent postprocessing.
651

652
        Args:
653
          obj (models.Object):
654
          from_user (models.User): user (actor) this activity/object is from
655
          kwargs: protocol-specific, passed through to :meth:`_convert`
656

657
        Returns:
658
          converted object in the protocol's native format, often a dict
659
        """
660
        if not obj or not obj.as1:
1✔
661
            return {}
1✔
662

663
        id = obj.key.id() if obj.key else obj.as1.get('id')
1✔
664
        is_crud = obj.as1.get('verb') in as1.CRUD_VERBS
1✔
665
        base_obj = as1.get_object(obj.as1) if is_crud else obj.as1
1✔
666
        orig_our_as1 = obj.our_as1
1✔
667

668
        # post-processing for user profiles
669
        if (from_user and from_user.is_profile(obj)
1✔
670
                and PROTOCOLS.get(obj.source_protocol) != cls
671
                and Protocol.for_bridgy_subdomain(id) not in DOMAINS):
672
            # TODO: more systematic way to get this that covers all protocols,
673
            # eg Nostr NIP-05
674
            web_opted_in = (from_user.LABEL == 'web' and
1✔
675
                            (from_user.last_webmention_in
676
                             or from_user.has_redirects
677
                             or from_user.handle_as('atproto') == from_user.key.id()))
678
            if not web_opted_in:
1✔
679
                # mark bridged actors as bots and add "bridged by Bridgy Fed" to
680
                # their bios. (web users are special cased, they don't get the label
681
                # if they've explicitly enabled Bridgy Fed with redirects or
682
                # webmentions.)
683
                cls.add_source_links(obj=obj, from_user=from_user)
1✔
684

685
                # web is currently opt out, so add [Unofficial] to their display name
686
                # to be explicit that they may not have enabled this themselves
687
                if from_user.LABEL == 'web':
1✔
688
                    if obj.our_as1 is orig_our_as1:
1✔
689
                        obj.our_as1 = copy.deepcopy(obj.as1)
×
690
                    actor = as1.get_object(obj.our_as1) if is_crud else obj.our_as1
1✔
691
                    if ((name := actor.get('displayName'))
1✔
692
                            and not name.endswith(' [Unofficial]')):
693
                        actor['displayName'] = f'{name} [Unofficial]'
1✔
694

695
        converted = cls._convert(obj, from_user=from_user, **kwargs)
1✔
696
        obj.our_as1 = orig_our_as1
1✔
697
        return converted
1✔
698

699
    @classmethod
1✔
700
    def _convert(cls, obj, from_user=None, **kwargs):
1✔
701
        """Converts an :class:`Object` to this protocol's data format.
702

703
        To be implemented by subclasses. Implementations should generally call
704
        :meth:`Protocol.translate_ids` (as their own class) before converting to
705
        their format.
706

707
        Args:
708
          obj (models.Object):
709
          from_user (models.User): user (actor) this activity/object is from
710
          kwargs: protocol-specific
711

712
        Returns:
713
          converted object in the protocol's native format, often a dict. May
714
            return the ``{}`` empty dict if the object can't be converted.
715
        """
716
        raise NotImplementedError()
×
717

718
    @classmethod
1✔
719
    def add_source_links(cls, obj, from_user):
1✔
720
        """Adds "bridged from ... by Bridgy Fed" to the user's actor's ``summary``.
721

722
        Uses HTML for protocols that support it, plain text otherwise.
723

724
        Args:
725
          cls (Protocol subclass): protocol that the user is bridging into
726
          obj (models.Object): user's actor/profile object
727
          from_user (models.User): user (actor) this activity/object is from
728
        """
729
        assert obj and obj.as1
1✔
730
        assert from_user
1✔
731

732
        obj.our_as1 = copy.deepcopy(obj.as1)
1✔
733
        actor = (as1.get_object(obj.as1) if obj.type in as1.CRUD_VERBS
1✔
734
                 else obj.as1)
735
        actor.setdefault('objectType', 'person')
1✔
736

737
        orig_summary = actor.setdefault('summary', '')
1✔
738
        summary_text = html_to_text(orig_summary, ignore_links=True)
1✔
739

740
        # Check if we've already added source links
741
        if '🌉 bridged' in summary_text:
1✔
742
            return
1✔
743

744
        actor_id = actor.get('id')
1✔
745

746
        url = (as1.get_url(actor)
1✔
747
               or (from_user.web_url() if from_user.profile_id() == actor_id
748
                   else actor_id))
749

750
        from web import Web
1✔
751
        bot_user = Web.get_by_id(from_user.bot_user_id())
1✔
752

753
        if cls.HTML_PROFILES:
1✔
754
            if bot_user and from_user.LABEL not in cls.DEFAULT_ENABLED_PROTOCOLS:
1✔
755
                mention = bot_user.html_link(proto=cls, name=False, handle='short')
1✔
756
                suffix = f', follow {mention} to interact'
1✔
757
            else:
758
                suffix = f' by <a href="https://{PRIMARY_DOMAIN}/">Bridgy Fed</a>'
1✔
759

760
            separator = '<br><br>'
1✔
761

762
            is_user = from_user.key and actor_id in (from_user.key.id(),
1✔
763
                                                     from_user.profile_id())
764
            if is_user:
1✔
765
                bridged = f'🌉 <a href="https://{PRIMARY_DOMAIN}{from_user.user_page_path()}">bridged</a>'
1✔
766
                from_ = f'<a href="{from_user.web_url()}">{from_user.handle}</a>'
1✔
767
            else:
768
                bridged = '🌉 bridged'
×
769
                from_ = util.pretty_link(url) if url else '?'
×
770

771
        else:  # plain text
772
            # TODO: unify with above. which is right?
773
            id = obj.key.id() if obj.key else obj.our_as1.get('id')
1✔
774
            is_user = from_user.key and id in (from_user.key.id(),
1✔
775
                                               from_user.profile_id())
776
            from_ = (from_user.web_url() if is_user else url) or '?'
1✔
777

778
            bridged = '🌉 bridged'
1✔
779
            suffix = (
1✔
780
                f': https://{PRIMARY_DOMAIN}{from_user.user_page_path()}'
781
                # link web users to their user pages
782
                if from_user.LABEL == 'web'
783
                else f', follow @{bot_user.handle_as(cls)} to interact'
784
                if bot_user and from_user.LABEL not in cls.DEFAULT_ENABLED_PROTOCOLS
785
                else f' by https://{PRIMARY_DOMAIN}/')
786
            separator = '\n\n'
1✔
787
            orig_summary = summary_text
1✔
788

789
        logo = f'{from_user.LOGO_EMOJI} ' if from_user.LOGO_EMOJI else ''
1✔
790
        source_links = f'{separator if orig_summary else ""}{bridged} from {logo}{from_}{suffix}'
1✔
791
        actor['summary'] = orig_summary + source_links
1✔
792

793
    @classmethod
1✔
794
    def set_username(to_cls, user, username):
1✔
795
        """Sets a custom username for a user's bridged account in this protocol.
796

797
        Args:
798
          user (models.User)
799
          username (str)
800

801
        Raises:
802
          ValueError: if the username is invalid
803
          RuntimeError: if the username could not be set
804
        """
805
        raise NotImplementedError()
1✔
806

807
    @classmethod
1✔
808
    def migrate_out(cls, user, to_user_id):
1✔
809
        """Migrates a bridged account out to be a native account.
810

811
        Args:
812
          user (models.User)
813
          to_user_id (str)
814

815
        Raises:
816
          ValueError: eg if this protocol doesn't own ``to_user_id``, or if
817
            ``user`` is on this protocol or not bridged to this protocol
818
        """
819
        raise NotImplementedError()
×
820

821
    @classmethod
1✔
822
    def check_can_migrate_out(cls, user, to_user_id):
1✔
823
        """Raises an exception if a user can't yet migrate to a native account.
824

825
        For example, if ``to_user_id`` isn't on this protocol, or if ``user`` is on
826
        this protocol, or isn't bridged to this protocol.
827

828
        If the user is ready to migrate, returns ``None``.
829

830
        Subclasses may override this to add more criteria, but they should call this
831
        implementation first.
832

833
        Args:
834
          user (models.User)
835
          to_user_id (str)
836

837
        Raises:
838
          ValueError: if ``user`` isn't ready to migrate to this protocol yet
839
        """
840
        def _error(msg):
1✔
841
            logger.warning(msg)
1✔
842
            raise ValueError(msg)
1✔
843

844
        if cls.owns_id(to_user_id) is False:
1✔
845
            _error(f"{to_user_id} doesn't look like an {cls.LABEL} id")
1✔
846
        elif isinstance(user, cls):
1✔
847
            _error(f"{user.handle_or_id()} is on {cls.PHRASE}")
1✔
848
        elif not user.is_enabled(cls):
1✔
849
            _error(f"{user.handle_or_id()} isn't currently bridged to {cls.PHRASE}")
1✔
850

851
    @classmethod
1✔
852
    def migrate_in(cls, user, from_user_id, **kwargs):
1✔
853
        """Migrates a native account in to be a bridged account.
854

855
        The protocol independent parts are done here; protocol-specific parts are
856
        done in :meth:`_migrate_in`, which this wraps.
857

858
        Reloads the user's profile before calling :meth:`_migrate_in`.
859

860
        Args:
861
          user (models.User): native user on another protocol to attach the
862
            newly imported bridged account to
863
          from_user_id (str)
864
          kwargs: additional protocol-specific parameters
865

866
        Raises:
867
          ValueError: eg if this protocol doesn't own ``from_user_id``, or if
868
            ``user`` is on this protocol or already bridged to this protocol
869
        """
870
        def _error(msg):
1✔
871
            logger.warning(msg)
1✔
872
            raise ValueError(msg)
1✔
873

874
        logger.info(f"Migrating in {from_user_id} for {user.key.id()}")
1✔
875

876
        # check req'ts
877
        if cls.owns_id(from_user_id) is False:
1✔
878
            _error(f"{from_user_id} doesn't look like an {cls.LABEL} id")
1✔
879
        elif isinstance(user, cls):
1✔
880
            _error(f"{user.handle_or_id()} is on {cls.PHRASE}")
1✔
881
        elif cls.HAS_COPIES and cls.LABEL in user.enabled_protocols:
1✔
882
            _error(f"{user.handle_or_id()} is already bridged to {cls.PHRASE}")
1✔
883

884
        # reload profile
885
        try:
1✔
886
            user.reload_profile()
1✔
887
        except (RequestException, HTTPException) as e:
×
888
            _, msg = util.interpret_http_exception(e)
×
889

890
        # migrate!
891
        cls._migrate_in(user, from_user_id, **kwargs)
1✔
892
        user.add('enabled_protocols', cls.LABEL)
1✔
893
        user.put()
1✔
894

895
        # attach profile object
896
        if user.obj:
1✔
897
            if cls.HAS_COPIES:
1✔
898
                profile_id = ids.profile_id(id=from_user_id, proto=cls)
1✔
899
                user.obj.remove_copies_on(cls)
1✔
900
                user.obj.add('copies', Target(uri=profile_id, protocol=cls.LABEL))
1✔
901
                user.obj.put()
1✔
902

903
            common.create_task(queue='receive', obj_id=user.obj_key.id(),
1✔
904
                               authed_as=user.key.id())
905

906
    @classmethod
1✔
907
    def _migrate_in(cls, user, from_user_id, **kwargs):
1✔
908
        """Protocol-specific parts of migrating in external account.
909

910
        Called by :meth:`migrate_in`, which does most of the work, including calling
911
        :meth:`reload_profile` before this.
912

913
        Args:
914
          user (models.User): native user on another protocol to attach the
915
            newly imported account to. Unused.
916
          from_user_id (str): DID of the account to be migrated in
917
          kwargs: protocol dependent
918
        """
919
        raise NotImplementedError()
×
920

921
    @classmethod
1✔
922
    def target_for(cls, obj, shared=False):
1✔
923
        """Returns an :class:`Object`'s delivery target (endpoint).
924

925
        To be implemented by subclasses.
926

927
        Examples:
928

929
        * If obj has ``source_protocol`` ``web``, returns its URL, as a
930
          webmention target.
931
        * If obj is an ``activitypub`` actor, returns its inbox.
932
        * If obj is an ``activitypub`` object, returns it's author's or actor's
933
          inbox.
934

935
        Args:
936
          obj (models.Object):
937
          shared (bool): optional. If True, returns a common/shared
938
            endpoint, eg ActivityPub's ``sharedInbox``, that can be reused for
939
            multiple recipients for efficiency
940

941
        Returns:
942
          str: target endpoint, or None if not available.
943
        """
944
        raise NotImplementedError()
×
945

946
    @classmethod
1✔
947
    def is_blocklisted(cls, url, allow_internal=False):
1✔
948
        """Returns True if we block the given URL and shouldn't deliver to it.
949

950
        Default implementation here, subclasses may override.
951

952
        Args:
953
          url (str):
954
          allow_internal (bool): whether to return False for internal domains
955
            like ``fed.brid.gy``, ``bsky.brid.gy``, etc
956
        """
957
        blocklist = DOMAIN_BLOCKLIST
1✔
958
        if not DEBUG:
1✔
959
            blocklist += tuple(util.RESERVED_TLDS | util.LOCAL_TLDS)
1✔
960
        if not allow_internal:
1✔
961
            blocklist += DOMAINS
1✔
962
        return util.domain_or_parent_in(url, blocklist)
1✔
963

964
    @classmethod
1✔
965
    def translate_ids(to_cls, obj):
1✔
966
        """Translates all ids in an AS1 object to a specific protocol.
967

968
        Infers source protocol for each id value separately.
969

970
        For example, if ``proto`` is :class:`ActivityPub`, the ATProto URI
971
        ``at://did:plc:abc/coll/123`` will be converted to
972
        ``https://bsky.brid.gy/ap/at://did:plc:abc/coll/123``.
973

974
        Wraps these AS1 fields:
975

976
        * ``id``
977
        * ``actor``
978
        * ``author``
979
        * ``bcc``
980
        * ``bto``
981
        * ``cc``
982
        * ``featured[].items``, ``featured[].orderedItems``
983
        * ``object``
984
        * ``object.actor``
985
        * ``object.author``
986
        * ``object.id``
987
        * ``object.inReplyTo``
988
        * ``object.object``
989
        * ``attachments[].id``
990
        * ``tags[objectType=mention].url``
991
        * ``to``
992

993
        This is the inverse of :meth:`models.Object.resolve_ids`. Much of the
994
        same logic is duplicated there!
995

996
        TODO: unify with :meth:`Object.resolve_ids`,
997
        :meth:`models.Object.normalize_ids`.
998

999
        Args:
1000
          to_proto (Protocol subclass)
1001
          obj (dict): AS1 object or activity (not :class:`models.Object`!)
1002

1003
        Returns:
1004
          dict: translated AS1 version of ``obj``
1005
        """
1006
        from ui import UIProtocol
1✔
1007

1008
        assert to_cls != Protocol
1✔
1009
        if not obj:
1✔
1010
            return obj
1✔
1011

1012
        outer_obj = to_cls.translate_mention_handles(copy.deepcopy(obj))
1✔
1013
        inner_objs = outer_obj['object'] = as1.get_objects(outer_obj)
1✔
1014

1015
        def translate(elem, field, fn, uri=False):
1✔
1016
            owner_id = as1.get_owner(elem)
1✔
1017
            owner_proto = Protocol.for_id(owner_id)
1✔
1018

1019
            elem[field] = as1.get_objects(elem, field)
1✔
1020
            for obj in elem[field]:
1✔
1021
                if id := obj.get('id'):
1✔
1022
                    if field in ('to', 'cc', 'bcc', 'bto') and as1.is_audience(id):
1✔
1023
                        continue
1✔
1024

1025
                    from_cls = Protocol.for_id(id)
1✔
1026
                    if field == 'id' and from_cls == UIProtocol and owner_proto:
1✔
1027
                        logger.info(f'owner of {id} {owner_id} is {owner_proto.LABEL}, translating id from that protocol')
1✔
1028
                        from_cls = owner_proto
1✔
1029

1030
                    # TODO: what if from_cls is None? relax translate_object_id,
1031
                    # make it a noop if we don't know enough about from/to?
1032
                    if from_cls and from_cls != to_cls:
1✔
1033
                        obj['id'] = fn(id=id, from_=from_cls, to=to_cls)
1✔
1034
                    if uri:
1✔
1035
                        obj['id'] = to_cls(id=obj['id']).id_uri() if obj['id'] else id
1✔
1036

1037
            elem[field] = [o['id'] if o.keys() == {'id'} else o
1✔
1038
                           for o in elem[field]]
1039

1040
            if len(elem[field]) == 1 and field not in ('items', 'orderedItems'):
1✔
1041
                elem[field] = elem[field][0]
1✔
1042

1043
        type = as1.object_type(outer_obj)
1✔
1044
        translate(outer_obj, 'id',
1✔
1045
                  ids.translate_user_id if type in as1.ACTOR_TYPES
1046
                  else ids.translate_object_id)
1047

1048
        for o in inner_objs:
1✔
1049
            is_actor = (as1.object_type(o) in as1.ACTOR_TYPES
1✔
1050
                        or as1.get_owner(outer_obj) == o.get('id')
1051
                        or type in ('follow', 'stop-following', 'block'))
1052
            translate(o, 'id', (ids.translate_user_id if is_actor
1✔
1053
                                else ids.translate_object_id))
1054
            # TODO: need to handle both user and object ids here
1055
            # https://github.com/snarfed/bridgy-fed/issues/2281
1056
            obj_is_actor = o.get('verb') in as1.VERBS_WITH_ACTOR_OBJECT
1✔
1057
            translate(o, 'object', (ids.translate_user_id if obj_is_actor
1✔
1058
                                    else ids.translate_object_id))
1059

1060
        for o in [outer_obj] + inner_objs:
1✔
1061
            translate(o, 'inReplyTo', ids.translate_object_id)
1✔
1062
            for field in 'actor', 'author', 'to', 'cc', 'bto', 'bcc':
1✔
1063
                translate(o, field, ids.translate_user_id)
1✔
1064
            for tag in as1.get_objects(o, 'tags'):
1✔
1065
                if tag.get('objectType') == 'mention':
1✔
1066
                    translate(tag, 'url', ids.translate_user_id, uri=True)
1✔
1067
            for att in as1.get_objects(o, 'attachments'):
1✔
1068
                translate(att, 'id', ids.translate_object_id)
1✔
1069
                url = att.get('url')
1✔
1070
                if url and not att.get('id'):
1✔
1071
                    if from_cls := Protocol.for_id(url):
1✔
1072
                        att['id'] = ids.translate_object_id(from_=from_cls, to=to_cls,
1✔
1073
                                                            id=url)
1074
            if feat := as1.get_object(o, 'featured'):
1✔
1075
                translate(feat, 'orderedItems', ids.translate_object_id)
1✔
1076
                translate(feat, 'items', ids.translate_object_id)
1✔
1077

1078
        outer_obj = util.trim_nulls(outer_obj)
1✔
1079

1080
        if objs := util.get_list(outer_obj ,'object'):
1✔
1081
            outer_obj['object'] = [o['id'] if o.keys() == {'id'} else o for o in objs]
1✔
1082
            if len(outer_obj['object']) == 1:
1✔
1083
                outer_obj['object'] = outer_obj['object'][0]
1✔
1084

1085
        return outer_obj
1✔
1086

1087
    @classmethod
1✔
1088
    def translate_mention_handles(cls, obj):
1✔
1089
        """Translates @-mentions in ``obj.content`` to this protocol's handles.
1090

1091
        Specifically, for each ``mention`` tag in the object's tags that has
1092
        ``startIndex`` and ``length``, replaces it in ``obj.content`` with that
1093
        user's translated handle in this protocol and updates the tag's location.
1094

1095
        Called by :meth:`Protocol.translate_ids`.
1096

1097
        If ``obj.content`` is HTML, does nothing.
1098

1099
        Args:
1100
          obj (dict): AS1 object
1101

1102
        Returns:
1103
          dict: modified AS1 object
1104
        """
1105
        if not obj:
1✔
1106
            return None
×
1107

1108
        obj = copy.deepcopy(obj)
1✔
1109
        obj['object'] = [cls.translate_mention_handles(o)
1✔
1110
                                for o in as1.get_objects(obj)]
1111
        if len(obj['object']) == 1:
1✔
1112
            obj['object'] = obj['object'][0]
1✔
1113

1114
        content = obj.get('content')
1✔
1115
        tags = obj.get('tags')
1✔
1116
        if (not content or not tags
1✔
1117
                or obj.get('content_is_html')
1118
                or bool(BeautifulSoup(content, 'html.parser').find())
1119
                or HTML_ENTITY_RE.search(content)):
1120
            return util.trim_nulls(obj)
1✔
1121

1122
        indexed = [tag for tag in tags if tag.get('startIndex') and tag.get('length')]
1✔
1123

1124
        offset = 0
1✔
1125
        last_orig_end = 0
1✔
1126
        for tag in sorted(indexed, key=lambda t: t['startIndex']):
1✔
1127
            orig_start = tag['startIndex']
1✔
1128
            if orig_start < last_orig_end:
1✔
1129
                logger.warning(f'tags overlap! removing indices from {tag.get("url")}')
1✔
1130
                del tag['startIndex']
1✔
1131
                del tag['length']
1✔
1132
                continue
1✔
1133

1134
            orig_end = orig_start + tag['length']
1✔
1135
            last_orig_end = orig_end
1✔
1136
            tag['startIndex'] += offset
1✔
1137
            if tag.get('objectType') == 'mention' and (id := tag['url']):
1✔
1138
                if proto := Protocol.for_id(id):
1✔
1139
                    id = ids.normalize_user_id(id=id, proto=proto)
1✔
1140
                    if key := get_original_user_key(id):
1✔
1141
                        user = key.get()
×
1142
                    else:
1143
                        user = proto.get_or_create(id, allow_opt_out=True)
1✔
1144
                    if user:
1✔
1145
                        start = tag['startIndex']
1✔
1146
                        end = start + tag['length']
1✔
1147
                        if handle := user.handle_as(cls):
1✔
1148
                            content = content[:start] + handle + content[end:]
1✔
1149
                            offset += len(handle) - tag['length']
1✔
1150
                            tag.update({
1✔
1151
                                'displayName': handle,
1152
                                'length': len(handle),
1153
                            })
1154

1155
        obj['tags'] = tags
1✔
1156
        as2.set_content(obj, content)  # sets content *and* contentMap; obj is still AS1 here
1✔
1157
        return util.trim_nulls(obj)
1✔
1158

1159
    @classmethod
1✔
1160
    def receive(from_cls, obj, authed_as=None, internal=False, received_at=None):
1✔
1161
        """Handles an incoming activity.
1162

1163
        If ``obj``'s key is unset, ``obj.as1``'s id field is used. If both are
1164
        unset, returns HTTP 299.
1165

1166
        Args:
1167
          obj (models.Object)
1168
          authed_as (str): authenticated actor id who sent this activity
1169
          internal (bool): whether to allow activity ids on internal domains,
1170
            from opted out/blocked users, etc.
1171
          received_at (datetime): when we first saw (received) this activity.
1172
            Right now only used for monitoring.
1173

1174
        Returns:
1175
          (str, int) tuple: (response body, HTTP status code) Flask response
1176

1177
        Raises:
1178
          werkzeug.HTTPException: if the request is invalid
1179
        """
1180
        # check some invariants
1181
        assert from_cls != Protocol
1✔
1182
        assert isinstance(obj, Object), obj
1✔
1183

1184
        if not obj.as1:
1✔
1185
            error('No object data provided')
1✔
1186

1187
        orig_obj = obj
1✔
1188
        id = None
1✔
1189
        if obj.key and obj.key.id():
1✔
1190
            id = obj.key.id()
1✔
1191

1192
        if not id:
1✔
1193
            id = obj.as1.get('id')
1✔
1194
            obj.key = ndb.Key(Object, id)
1✔
1195

1196
        if not id:
1✔
1197
            error('No id provided')
×
1198
        elif from_cls.owns_id(id) is False:
1✔
1199
            error(f'Protocol {from_cls.LABEL} does not own id {id}')
1✔
1200
        elif from_cls.is_blocklisted(id, allow_internal=internal):
1✔
1201
            error(f'{id} is blocklisted')
1✔
1202

1203
        # does this protocol support this activity/object type?
1204
        from_cls.check_supported(obj, 'receive')
1✔
1205

1206
        # lease this object, atomically
1207
        memcache_key = activity_id_memcache_key(id)
1✔
1208
        leased = memcache.memcache.add(
1✔
1209
            memcache_key, 'leased', noreply=False,
1210
            expire=int(MEMCACHE_LEASE_EXPIRATION.total_seconds()))
1211

1212
        # short circuit if we've already seen this activity id
1213
        if ('force' not in request.values
1✔
1214
            and (not leased
1215
                 or (obj.new is False and obj.changed is False))):
1216
            error(f'Already seen', status=204)
1✔
1217

1218
        pruned = {k: v for k, v in obj.as1.items()
1✔
1219
                  if k not in ('contentMap', 'replies', 'signature')}
1220
        delay = ''
1✔
1221
        retry = request.headers.get('X-AppEngine-TaskRetryCount')
1✔
1222
        if (received_at and retry in (None, '0')
1✔
1223
                and obj.type not in ('delete', 'undo')):  # we delay deletes/undos
1224
            delay_s = int((util.now().replace(tzinfo=None)
1✔
1225
                           - received_at.replace(tzinfo=None)
1226
                           ).total_seconds())
1227
            delay = f'({delay_s} s behind)'
1✔
1228
        logger.info(f'Receiving {from_cls.LABEL} {obj.type} {id} {delay} AS1: {json_dumps(pruned, indent=2)}')
1✔
1229

1230
        # check authorization
1231
        # https://www.w3.org/wiki/ActivityPub/Primer/Authentication_Authorization
1232
        actor = as1.get_owner(obj.as1)
1✔
1233
        if not actor:
1✔
1234
            error('Activity missing actor or author')
1✔
1235

1236
        if not (from_user_cls := obj.owner_protocol()):
1✔
1237
            error(f"couldn't determine owner protocol for {obj.key.id()} source_protocol {obj.source_protocol}", status=204)
×
1238
        elif from_user_cls.owns_id(actor) is False:
1✔
1239
            error(f"{from_user_cls.LABEL} doesn't own actor {actor}, this is probably a bridged activity. Skipping.", status=204)
1✔
1240

1241
        assert authed_as
1✔
1242
        assert isinstance(authed_as, str)
1✔
1243
        authed_as = ids.normalize_user_id(id=authed_as, proto=from_user_cls)
1✔
1244
        actor = ids.normalize_user_id(id=actor, proto=from_user_cls)
1✔
1245
        if actor != authed_as and not internal:
1✔
1246
            report_error("Auth: receive: authed_as doesn't match owner",
1✔
1247
                         user=f'{id} authed_as {authed_as} owner {actor}')
1248
            error(f"actor {actor} isn't authed user {authed_as}")
1✔
1249

1250
        # update copy ids to originals
1251
        obj.normalize_ids()
1✔
1252
        obj.resolve_ids()
1✔
1253

1254
        if (obj.type == 'follow'
1✔
1255
                and Protocol.for_bridgy_subdomain(as1.get_object(obj.as1).get('id'))):
1256
            # follows of bot user; refresh user profile first
1257
            logger.info(f'Follow of bot user, reloading {actor}')
1✔
1258
            from_user = from_user_cls.get_or_create(id=actor, allow_opt_out=True)
1✔
1259
            from_user.reload_profile()
1✔
1260
        else:
1261
            # load actor user
1262
            from_user = from_user_cls.get_or_create(id=actor, allow_opt_out=True)
1✔
1263

1264
        if not internal and (not from_user or from_user.manual_opt_out):
1✔
1265
            error(f"Couldn't load actor {actor}", status=204)
×
1266

1267
        # apply protocol-specific filters
1268
        if 'force' not in request.values:
1✔
1269
            for filter in from_cls.RECEIVE_FILTERS:
1✔
1270
                if filter(obj, from_user):
1✔
1271
                    error(f'Activity {id} blocked by filter {filter.__name__}')
1✔
1272

1273
        # check if this is a profile object coming in via a user with use_instead
1274
        # set. if so, override the object's id to be the final user id (from_user's),
1275
        # after following use_instead.
1276
        if obj.type in as1.ACTOR_TYPES and from_user.key.id() != actor:
1✔
1277
            as1_id = obj.as1.get('id')
1✔
1278
            if ids.normalize_user_id(id=as1_id, proto=from_user) == actor:
1✔
1279
                logger.info(f'Overriding AS1 object id {as1_id} with Object id {from_user.profile_id()}')
1✔
1280
                obj.our_as1 = {**obj.as1, 'id': from_user.profile_id()}
1✔
1281

1282
        # if this is an object, ie not an activity, wrap it in a create or update
1283
        obj = from_cls.handle_bare_object(obj, authed_as=authed_as,
1✔
1284
                                          from_user=from_user)
1285
        obj.add('users', from_user.key)
1✔
1286

1287
        inner_obj_as1 = as1.get_object(obj.as1)
1✔
1288
        inner_obj_id = inner_obj_as1.get('id')
1✔
1289
        if obj.type in as1.CRUD_VERBS | as1.VERBS_WITH_OBJECT:
1✔
1290
            if not inner_obj_id:
1✔
1291
                error(f'{obj.type} object has no id!')
1✔
1292

1293
        # check age. we support backdated posts, but if they're over 2w old, we
1294
        # don't deliver them
1295
        if obj.type == 'post':
1✔
1296
            if published := inner_obj_as1.get('published'):
1✔
1297
                try:
1✔
1298
                    published_dt = util.parse_iso8601(published)
1✔
1299
                    if not published_dt.tzinfo:
1✔
1300
                        published_dt = published_dt.replace(tzinfo=timezone.utc)
×
1301
                    age = util.now() - published_dt
1✔
1302
                    if (age > CREATE_MAX_AGE
1✔
1303
                            and 'force' not in request.values
1304
                            and not util.domain_or_parent_in(
1305
                                from_user.key.id(), CREATE_MAX_AGE_EXEMPT_DOMAINS)):
1306
                        error(f'Ignoring, too old, {age} is over {CREATE_MAX_AGE}',
×
1307
                              status=204)
1308
                except ValueError:  # from parse_iso8601
×
1309
                    logger.debug(f"Couldn't parse published {published}")
×
1310

1311
        # write Object to datastore
1312
        if obj.type in STORE_AS1_TYPES:
1✔
1313
            obj.put()
1✔
1314

1315
        # store inner object
1316
        # TODO: unify with big obj.type conditional below. would have to merge
1317
        # this with the DM handling block lower down.
1318
        crud_obj = None
1✔
1319
        if obj.type in ('post', 'update') and inner_obj_as1.keys() > set(['id']):
1✔
1320
            # normalize_ids may have converted the inner object id to a user id
1321
            # (eg Web profile URL to domain), so normalize back to the profile
1322
            # object id to find the right existing Object in the datastore
1323
            crud_obj_id = (ids.normalize_object_id(id=inner_obj_id, proto=from_cls)
1✔
1324
                           or inner_obj_id)
1325
            crud_obj = Object.get_or_create(crud_obj_id, our_as1=inner_obj_as1,
1✔
1326
                                            source_protocol=obj.source_protocol,
1327
                                            authed_as=actor, users=[from_user.key],
1328
                                            deleted=False)
1329

1330
        actor = as1.get_object(obj.as1, 'actor')
1✔
1331
        actor_id = actor.get('id')
1✔
1332

1333
        # handle activity!
1334
        if obj.type == 'stop-following':
1✔
1335
            # TODO: unify with handle_follow?
1336
            # TODO: handle multiple followees
1337
            if not actor_id or not inner_obj_id:
1✔
1338
                error(f'stop-following requires actor id and object id. Got: {actor_id} {inner_obj_id} {obj.as1}')
×
1339

1340
            # deactivate Follower
1341
            from_ = from_user_cls.key_for(actor_id)
1✔
1342
            if not (to_cls := Protocol.for_id(inner_obj_id)):
1✔
1343
                error(f"Can't determine protocol for {inner_obj_id} , giving up")
1✔
1344
            to = to_cls.key_for(inner_obj_id)
1✔
1345
            follower = Follower.query(Follower.to == to,
1✔
1346
                                      Follower.from_ == from_,
1347
                                      Follower.status == 'active').get()
1348
            if follower:
1✔
1349
                logger.info(f'Marking {follower} inactive')
1✔
1350
                follower.status = 'inactive'
1✔
1351
                follower.put()
1✔
1352
            else:
1353
                logger.warning(f'No Follower found for {from_} => {to}')
1✔
1354

1355
            # fall through to deliver to followee
1356
            # TODO: do we convert stop-following to webmention 410 of original
1357
            # follow?
1358

1359
            # fall through to deliver to followers
1360

1361
        elif obj.type in ('delete', 'undo'):
1✔
1362
            delete_obj_id = (from_user.profile_id()
1✔
1363
                            if inner_obj_id == from_user.key.id()
1364
                            else inner_obj_id)
1365

1366
            delete_obj = Object.get_by_id(delete_obj_id, authed_as=authed_as)
1✔
1367
            if not delete_obj:
1✔
1368
                logger.info(f"Ignoring, we don't have {delete_obj_id} stored")
1✔
1369
                return 'OK', 204
1✔
1370

1371
            # TODO: just delete altogether!
1372
            logger.info(f'Marking Object {delete_obj_id} deleted')
1✔
1373
            delete_obj.deleted = True
1✔
1374
            delete_obj.put()
1✔
1375

1376
            # if this is an actor, handle deleting it later so that
1377
            # in case it's from_user, user.enabled_protocols is still populated
1378
            #
1379
            # fall through to deliver to followers and delete copy if necessary.
1380
            # should happen via protocol-specific copy target and send of
1381
            # delete activity.
1382
            # https://github.com/snarfed/bridgy-fed/issues/63
1383

1384
        elif obj.type == 'block':
1✔
1385
            if proto := Protocol.for_bridgy_subdomain(inner_obj_id):
1✔
1386
                # blocking protocol bot user disables that protocol
1387
                from_user.delete(proto)
1✔
1388
                from_user.disable_protocol(proto)
1✔
1389
                return 'OK', 200
1✔
1390

1391
        elif obj.type == 'move':
1✔
1392
            from_cls.handle_move(obj, from_user=from_user)
1✔
1393
            # fall through to deliver the Move activity to remaining followers
1394

1395
        elif obj.type == 'post':
1✔
1396
            # handle DMs to bot users
1397
            if as1.is_dm(obj.as1):
1✔
1398
                return dms.receive(from_user=from_user, obj=obj)
1✔
1399

1400
        # fetch actor if necessary
1401
        is_user = from_user.is_profile(orig_obj)
1✔
1402
        if (actor and actor.keys() == set(['id'])
1✔
1403
                and not is_user and obj.type not in ('delete', 'undo')):
1404
            logger.debug('Fetching actor so we have name, profile photo, etc')
1✔
1405
            actor_obj = from_user_cls.load(
1✔
1406
                ids.profile_id(id=actor['id'], proto=from_cls), raise_=False)
1407
            if actor_obj and actor_obj.as1:
1✔
1408
                obj.our_as1 = {
1✔
1409
                    **obj.as1, 'actor': {
1410
                        **actor_obj.as1,
1411
                        # override profile id with actor id
1412
                        # https://github.com/snarfed/bridgy-fed/issues/1720
1413
                        'id': actor['id'],
1414
                    }
1415
                }
1416

1417
        # fetch object if necessary
1418
        if (obj.type in ('post', 'update', 'share')
1✔
1419
                and inner_obj_as1.keys() == set(['id'])
1420
                and from_cls.owns_id(inner_obj_id) is not False):
1421
            logger.debug('Fetching inner object')
1✔
1422
            inner_obj = from_cls.load(inner_obj_id, raise_=False,
1✔
1423
                                      remote=(obj.type in ('post', 'update')))
1424
            if obj.type in ('post', 'update'):
1✔
1425
                crud_obj = inner_obj
1✔
1426
            if inner_obj and inner_obj.as1:
1✔
1427
                obj.our_as1 = {
1✔
1428
                    **obj.as1,
1429
                    'object': {
1430
                        **inner_obj_as1,
1431
                        **inner_obj.as1,
1432
                    }
1433
                }
1434
            elif obj.type in ('post', 'update'):
1✔
1435
                error(f"Need object {inner_obj_id} but couldn't fetch, giving up")
1✔
1436

1437
        if obj.type == 'follow':
1✔
1438
            if proto := Protocol.for_bridgy_subdomain(inner_obj_id):
1✔
1439
                # follow of one of our protocol bot users; enable that protocol.
1440
                # fall through so that we send an accept.
1441
                try:
1✔
1442
                    from_user.enable_protocol(proto)
1✔
1443
                except ErrorButDoNotRetryTask:
1✔
1444
                    from web import Web
1✔
1445
                    bot = Web.get_by_id(proto.bot_user_id())
1✔
1446
                    from_cls.respond_to_follow('reject', follower=from_user,
1✔
1447
                                               followee=bot, follow=obj)
1448
                    raise
1✔
1449
                proto.bot_maybe_follow_back(from_user)
1✔
1450
                from_cls.handle_follow(obj, from_user=from_user)
1✔
1451
                return 'OK', 202
1✔
1452

1453
            from_cls.handle_follow(obj, from_user=from_user)
1✔
1454

1455
        # on update of the user's own actor/profile, set user.obj and store user back
1456
        # to datastore so that we recalculate computed properties like status etc
1457
        if is_user:
1✔
1458
            if obj.type == 'update' and crud_obj:
1✔
1459
                logger.info(f"update of the user's profile, re-storing user with obj_key {crud_obj.key.id()}")
1✔
1460
                from_user.obj = crud_obj
1✔
1461
                from_user.put()
1✔
1462

1463
        # deliver to targets
1464
        resp = from_cls.deliver(obj, from_user=from_user, crud_obj=crud_obj)
1✔
1465

1466
        # on user deleting themselves, deactivate their followers/followings.
1467
        # https://github.com/snarfed/bridgy-fed/issues/1304
1468
        #
1469
        # do this *after* delivering because delivery finds targets based on
1470
        # stored Followers
1471
        if is_user and obj.type == 'delete':
1✔
1472
            for proto in from_user.enabled_protocols:
1✔
1473
                from_user.disable_protocol(PROTOCOLS[proto])
1✔
1474

1475
            logger.info(f'Deactivating Followers from or to {from_user.key.id()}')
1✔
1476
            followers = Follower.query(
1✔
1477
                OR(Follower.to == from_user.key, Follower.from_ == from_user.key)
1478
            ).fetch()
1479
            for f in followers:
1✔
1480
                f.status = 'inactive'
1✔
1481
            ndb.put_multi(followers)
1✔
1482

1483
        memcache.memcache.set(memcache_key, 'done', expire=7 * 24 * 60 * 60)  # 1w
1✔
1484
        return resp
1✔
1485

1486
    @classmethod
1✔
1487
    def handle_follow(from_cls, obj, from_user):
1✔
1488
        """Handles an incoming follow activity.
1489

1490
        Sends an ``Accept`` back, but doesn't send the ``Follow`` itself. That
1491
        happens in :meth:`deliver`.
1492

1493
        Args:
1494
          obj (models.Object): follow activity
1495
        """
1496
        logger.debug('Got follow. storing Follow(s), sending accept(s)')
1✔
1497
        from_id = from_user.key.id()
1✔
1498

1499
        # Prepare followee (to) users' data
1500
        to_as1s = as1.get_objects(obj.as1)
1✔
1501
        if not to_as1s:
1✔
1502
            error(f'Follow activity requires object(s). Got: {obj.as1}')
×
1503

1504
        # Store Followers
1505
        for to_as1 in to_as1s:
1✔
1506
            to_id = to_as1.get('id')
1✔
1507
            if not to_id:
1✔
1508
                error(f'Follow activity requires object(s). Got: {obj.as1}')
×
1509

1510
            logger.info(f'Follow {from_id} => {to_id}')
1✔
1511

1512
            to_cls = Protocol.for_id(to_id)
1✔
1513
            if not to_cls:
1✔
1514
                error(f"Couldn't determine protocol for {to_id}")
×
1515
            elif from_cls == to_cls:
1✔
1516
                logger.info(f'Skipping same-protocol Follower {from_id} => {to_id}')
1✔
1517
                continue
1✔
1518

1519
            to_key = to_cls.key_for(to_id)
1✔
1520
            if not to_key:
1✔
1521
                logger.info(f'Skipping invalid {to_cls.LABEL} user key: {to_id}')
×
1522
                continue
×
1523

1524
            to_user = to_cls.get_or_create(id=to_key.id())
1✔
1525
            if not to_user or not to_user.is_enabled(from_cls):
1✔
1526
                error(f'{to_id} not found')
1✔
1527

1528
            follower_obj = Follower.get_or_create(to=to_user, from_=from_user,
1✔
1529
                                                  follow=obj.key, status='active')
1530
            if (from_cls.USES_OBJECT_FEED
1✔
1531
                    and from_cls.LABEL not in to_user.has_object_feed_followers_on):
1532
                to_user.has_object_feed_followers_on.append(from_cls.LABEL)
1✔
1533
                to_user.put()
1✔
1534

1535
            obj.add('notify', to_key)
1✔
1536
            from_cls.respond_to_follow('accept', follower=from_user,
1✔
1537
                                       followee=to_user, follow=obj)
1538

1539
    @classmethod
1✔
1540
    def respond_to_follow(_, verb, follower, followee, follow):
1✔
1541
        """Sends an accept or reject activity for a follow.
1542

1543
        ...if the follower's protocol supports accepts/rejects. Otherwise, does
1544
        nothing.
1545

1546
        Args:
1547
          verb (str): ``accept`` or  ``reject``
1548
          follower (models.User)
1549
          followee (models.User)
1550
          follow (models.Object)
1551
        """
1552
        assert verb in ('accept', 'reject')
1✔
1553
        if verb not in follower.SUPPORTED_AS1_TYPES:
1✔
1554
            return
1✔
1555

1556
        if not follower.obj or not (target := follower.target_for(follower.obj)):
1✔
1557
            error(f"Couldn't find delivery target for follower {follower.key.id()}")
1✔
1558

1559
        # send. note that this is one response for the whole follow, even if it
1560
        # has multiple followees!
1561
        id = f'{followee.key.id()}/followers#{verb}-{follow.key.id()}'
1✔
1562
        accept = {
1✔
1563
            'id': id,
1564
            'objectType': 'activity',
1565
            'verb': verb,
1566
            'actor': followee.key.id(),
1567
            'object': follow.as1,
1568
        }
1569
        common.create_task(queue='send', id=id, our_as1=accept, url=target,
1✔
1570
                           protocol=follower.LABEL, user=followee.key.urlsafe())
1571

1572
    @classmethod
1✔
1573
    def bot_maybe_follow_back(bot_cls, user):
1✔
1574
        """Follow a user from a protocol bot user, if their protocol needs that.
1575

1576
        ...so that the protocol starts sending us their activities, if it needs
1577
        a follow for that (eg ActivityPub).
1578

1579
        Args:
1580
          user (User)
1581
        """
1582
        if not user.BOTS_FOLLOW_BACK:
1✔
1583
            return
1✔
1584

1585
        from web import Web
1✔
1586
        bot = Web.get_by_id(bot_cls.bot_user_id())
1✔
1587
        now = util.now().isoformat()
1✔
1588
        logger.info(f'Following {user.key.id()} back from bot user {bot.key.id()}')
1✔
1589

1590
        if not user.obj:
1✔
1591
            logger.info("  can't follow, user has no profile obj")
1✔
1592
            return
1✔
1593

1594
        target = user.target_for(user.obj)
1✔
1595
        follow_back_id = f'https://{bot.key.id()}/#follow-back-{user.key.id()}-{now}'
1✔
1596
        follow_back_as1 = {
1✔
1597
            'objectType': 'activity',
1598
            'verb': 'follow',
1599
            'id': follow_back_id,
1600
            'actor': bot.key.id(),
1601
            'object': user.key.id(),
1602
        }
1603
        common.create_task(queue='send', id=follow_back_id,
1✔
1604
                           our_as1=follow_back_as1, url=target,
1605
                           source_protocol='web', protocol=user.LABEL,
1606
                           user=bot.key.urlsafe())
1607

1608
    @classmethod
1✔
1609
    def handle_move(from_cls, obj, from_user):
1✔
1610
        """Handles an incoming move (account migration) activity.
1611

1612
        Updates all of the account's :class:`Follower`s to point to the new id.
1613

1614
        Args:
1615
          obj (models.Object): follow activity
1616
          from_user (models.User): user (actor) this activity/object is from
1617
        """
1618
        if not (target_id := as1.get_id(obj.as1, 'target')):
1✔
1619
            error(f'Move activity requires target. Got: {obj.as1}')
1✔
1620

1621
        logger.info(f'Got move activity from {from_user.key.id()} to {target_id}')
1✔
1622

1623
        # check that object is the actor (the account being moved)
1624
        actor_id = as1.get_id(obj.as1, 'actor')
1✔
1625
        object_id = as1.get_id(obj.as1, 'object')
1✔
1626
        if actor_id != object_id:
1✔
1627
            error(f"Move activity object {object_id} isn't actor {actor_id}")
1✔
1628

1629
        # get the target protocol and key
1630
        to_cls = Protocol.for_id(target_id)
1✔
1631
        if not to_cls:
1✔
1632
            error(f"Couldn't determine protocol for target {target_id}")
×
1633

1634
        to_key = to_cls.key_for(target_id)
1✔
1635
        if not to_key:
1✔
1636
            error(f'Invalid {to_cls.LABEL} user key: {target_id}')
×
1637

1638
        # query for all active followers of the source account
1639
        followers = Follower.query(
1✔
1640
            Follower.to == from_user.key,
1641
            Follower.status == 'active'
1642
        ).fetch()
1643

1644
        # update each follower to point to the new account
1645
        # but skip if it would create a same-protocol follower
1646
        logger.info(f'Updating {len(followers)} followers from {actor_id} to {target_id}')
1✔
1647
        updated_followers = []
1✔
1648
        for follower in followers:
1✔
1649
            # check if this would create a same-protocol follower
1650
            if follower.from_.kind() != to_key.kind():
1✔
1651
                follower.to = to_key
1✔
1652
                updated_followers.append(follower)
1✔
1653
            else:
1654
                logger.info(f'Skipping same-protocol follower {follower.from_} => {to_key}')
1✔
1655

1656
        if updated_followers:
1✔
1657
            ndb.put_multi(updated_followers)
1✔
1658

1659
    @classmethod
1✔
1660
    def handle_bare_object(cls, obj, *, authed_as, from_user):
1✔
1661
        """If obj is a bare object, wraps it in a create or update activity.
1662

1663
        Checks if we've seen it before.
1664

1665
        Args:
1666
          obj (models.Object)
1667
          authed_as (str): authenticated actor id who sent this activity
1668
          from_user (models.User): user (actor) this activity/object is from
1669

1670
        Returns:
1671
          models.Object: ``obj`` if it's an activity, otherwise a new object
1672
        """
1673
        is_actor = obj.type in as1.ACTOR_TYPES
1✔
1674
        if not is_actor and obj.type not in ('note', 'article', 'comment'):
1✔
1675
            return obj
1✔
1676

1677
        obj_actor = ids.normalize_user_id(id=as1.get_owner(obj.as1), proto=cls)
1✔
1678
        now = util.now().isoformat()
1✔
1679

1680
        # this is a raw post; wrap it in a create or update activity
1681
        if obj.changed or is_actor:
1✔
1682
            if obj.changed:
1✔
1683
                logger.info(f'Content has changed from last time at {obj.updated}! Redelivering to all inboxes')
1✔
1684
            else:
1685
                logger.info(f'Got actor profile object, wrapping in update')
1✔
1686
            id = f'{obj.key.id()}#bridgy-fed-update-{now}'
1✔
1687
            update_as1 = {
1✔
1688
                'objectType': 'activity',
1689
                'verb': 'update',
1690
                'id': id,
1691
                'actor': obj_actor,
1692
                'object': {
1693
                    # Mastodon requires the updated field for Updates, so
1694
                    # add a default value.
1695
                    # https://docs.joinmastodon.org/spec/activitypub/#supported-activities-for-statuses
1696
                    # https://socialhub.activitypub.rocks/t/what-could-be-the-reason-that-my-update-activity-does-not-work/2893/4
1697
                    # https://github.com/mastodon/documentation/pull/1150
1698
                    'updated': now,
1699
                    **obj.as1,
1700
                },
1701
            }
1702
            logger.debug(f'  AS1: {json_dumps(update_as1, indent=2)}')
1✔
1703
            return Object(id=id, our_as1=update_as1,
1✔
1704
                          source_protocol=obj.source_protocol)
1705

1706
        if obj.new or 'force' in request.values:
1✔
1707
            create_id = f'{obj.key.id()}#bridgy-fed-create-{now}'
1✔
1708
            create_as1 = {
1✔
1709
                'objectType': 'activity',
1710
                'verb': 'post',
1711
                'id': create_id,
1712
                'actor': obj_actor,
1713
                'object': obj.as1,
1714
                'published': now,
1715
            }
1716
            logger.info(f'Wrapping in post')
1✔
1717
            logger.debug(f'  AS1: {json_dumps(create_as1, indent=2)}')
1✔
1718
            return Object(id=create_id, our_as1=create_as1,
1✔
1719
                          source_protocol=obj.source_protocol)
1720

1721
        error(f'{obj.key.id()} is unchanged, nothing to do', status=204)
×
1722

1723
    @classmethod
1✔
1724
    def deliver(from_cls, obj, from_user, crud_obj=None, to_proto=None):
1✔
1725
        """Delivers an activity to its external recipients.
1726

1727
        Args:
1728
          obj (models.Object): activity to deliver
1729
          from_user (models.User): user (actor) this activity is from
1730
          crud_obj (models.Object): if this is a create, update, or delete/undo
1731
            activity, the inner object that's being written, otherwise None.
1732
            (This object's ``notify`` and ``feed`` properties may be updated.)
1733
          to_proto (protocol.Protocol): optional; if provided, only deliver to
1734
            targets on this protocol
1735

1736
        Returns:
1737
          (str, int) tuple: Flask response
1738
        """
1739
        if to_proto:
1✔
1740
            logger.info(f'Only delivering to {to_proto.LABEL}')
1✔
1741

1742
        # find delivery targets. maps Target to Object or None
1743
        #
1744
        # ...then write the relevant object, since targets() has a side effect of
1745
        # setting the notify and feed properties (and dirty attribute)
1746
        targets = from_cls.targets(obj, from_user=from_user, crud_obj=crud_obj)
1✔
1747
        if to_proto:
1✔
1748
            targets = {t: obj for t, obj in targets.items()
1✔
1749
                       if t.protocol == to_proto.LABEL}
1750
        if not targets:
1✔
1751
            # don't raise via error() because we call deliver in code paths where
1752
            # we want to continue after
1753
            msg = r'No targets, nothing to do ¯\_(ツ)_/¯'
1✔
1754
            logger.info(msg)
1✔
1755
            return msg, 204
1✔
1756

1757
        # store object that targets() updated
1758
        if crud_obj and crud_obj.dirty:
1✔
1759
            crud_obj.put()
1✔
1760
        elif obj.type in STORE_AS1_TYPES and obj.dirty:
1✔
1761
            obj.put()
1✔
1762

1763
        obj_params = ({'obj_id': obj.key.id()} if obj.type in STORE_AS1_TYPES
1✔
1764
                      else obj.to_request())
1765

1766
        # sort targets so order is deterministic for tests, debugging, etc
1767
        sorted_targets = sorted(targets.items(), key=lambda t: t[0].uri)
1✔
1768

1769
        # enqueue send task for each targets
1770
        logger.info(f'Delivering to {" ".join(t.uri for t, _ in sorted_targets)}')
1✔
1771
        user = from_user.key.urlsafe()
1✔
1772
        for i, (target, orig_obj) in enumerate(sorted_targets):
1✔
1773
            orig_obj_id = orig_obj.key.id() if orig_obj else None
1✔
1774
            common.create_task(queue='send', url=target.uri, protocol=target.protocol,
1✔
1775
                               orig_obj_id=orig_obj_id, user=user, **obj_params)
1776

1777
        return 'OK', 202
1✔
1778

1779
    @classmethod
1✔
1780
    def targets(from_cls, obj, from_user, crud_obj=None, internal=False):
1✔
1781
        """Collects the targets to send a :class:`models.Object` to.
1782

1783
        Targets are both objects - original posts, events, etc - and actors.
1784

1785
        Args:
1786
          obj (models.Object)
1787
          from_user (User)
1788
          crud_obj (models.Object): if this is a create, update, or delete/undo
1789
            activity, the inner object that's being written, otherwise None.
1790
            (This object's ``notify`` and ``feed`` properties may be updated.)
1791
          internal (bool): whether this is a recursive internal call
1792

1793
        Returns:
1794
          dict: maps :class:`models.Target` to original (in response to)
1795
          :class:`models.Object`
1796
        """
1797
        logger.debug('Finding recipients and their targets')
1✔
1798

1799
        # we should only have crud_obj iff this is a create or update
1800
        assert (crud_obj is not None) == (obj.type in ('post', 'update')), obj.type
1✔
1801
        write_obj = crud_obj or obj
1✔
1802
        write_obj.dirty = False
1✔
1803

1804
        target_uris = as1.targets(obj.as1)
1✔
1805
        orig_obj = None
1✔
1806
        targets = {}  # maps Target (with *normalized* uri) to Object or None
1✔
1807
        owner = as1.get_owner(obj.as1)
1✔
1808
        allow_opt_out = (obj.type == 'delete')
1✔
1809
        inner_obj_as1 = as1.get_object(obj.as1)
1✔
1810
        inner_obj_id = inner_obj_as1.get('id')
1✔
1811
        in_reply_tos = as1.get_ids(inner_obj_as1, 'inReplyTo')
1✔
1812
        quoted_posts = as1.quoted_posts(inner_obj_as1)
1✔
1813
        mentioned_urls = as1.mentions(inner_obj_as1)
1✔
1814
        is_reply = obj.type == 'comment' or in_reply_tos
1✔
1815
        is_self_reply = False
1✔
1816

1817
        original_ids = []
1✔
1818
        if is_reply:
1✔
1819
            original_ids = in_reply_tos
1✔
1820
        elif inner_obj_id:
1✔
1821
            if inner_obj_id == from_user.key.id():
1✔
1822
                inner_obj_id = from_user.profile_id()
1✔
1823
            original_ids = [inner_obj_id]
1✔
1824

1825
        # maps id to Object
1826
        original_objs = {}
1✔
1827
        for id in original_ids:
1✔
1828
            if proto := Protocol.for_id(id):
1✔
1829
                original_objs[id] = proto.load(id, raise_=False)
1✔
1830

1831
        # for AP, add in-reply-tos' mentions
1832
        # https://github.com/snarfed/bridgy-fed/issues/1608
1833
        # https://github.com/snarfed/bridgy-fed/issues/1218
1834
        orig_post_mentions = {}  # maps mentioned id to original post Object
1✔
1835
        for id in in_reply_tos:
1✔
1836
            if ((in_reply_to_obj := original_objs.get(id))
1✔
1837
                    and (proto := PROTOCOLS.get(in_reply_to_obj.source_protocol))
1838
                    and proto.SEND_REPLIES_TO_ORIG_POSTS_MENTIONS
1839
                    and (mentions := as1.mentions(in_reply_to_obj.as1))):
1840
                logger.info(f"Adding in-reply-to {id} 's mentions to targets: {mentions}")
1✔
1841
                target_uris.extend(mentions)
1✔
1842
                for mention in mentions:
1✔
1843
                    orig_post_mentions[mention] = in_reply_to_obj
1✔
1844

1845
        target_uris = sorted(set(target_uris))
1✔
1846
        logger.info(f'Raw targets: {target_uris}')
1✔
1847

1848
        # which protocols should we allow delivering to?
1849
        to_protocols = []  # elements are Protocol subclasses
1✔
1850
        for label in (list(from_user.DEFAULT_ENABLED_PROTOCOLS)
1✔
1851
                      + from_user.enabled_protocols):
1852
            if not (proto := PROTOCOLS.get(label)):
1✔
1853
                report_error(f'unknown enabled protocol {label} for {from_user.key.id()}')
1✔
1854
                continue
1✔
1855

1856
            if (obj.type == 'post' and (orig := original_objs.get(inner_obj_id))
1✔
1857
                    and orig.get_copy(proto)):
1858
                logger.info(f'Already created {id} on {label}, cowardly refusing to create there again')
1✔
1859
                continue
1✔
1860

1861
            if proto.HAS_COPIES and (obj.type in ('update', 'delete', 'share', 'undo')
1✔
1862
                                     or is_reply):
1863
                origs_could_bridge = None
1✔
1864

1865
                for id in original_ids:
1✔
1866
                    if not (orig := original_objs.get(id)):
1✔
1867
                        continue
1✔
1868
                    elif orig.get_copy(proto):
1✔
1869
                        logger.info(f'Allowing {label}, original {id} was bridged there')
1✔
1870
                        break
1✔
1871
                    elif from_user.is_profile(orig):
1✔
1872
                        logger.info(f"Allowing {label}, this is the user's profile")
1✔
1873
                        break
1✔
1874

1875
                    if (origs_could_bridge is not False
1✔
1876
                            and (orig_author_id := as1.get_owner(orig.as1))
1877
                            and (orig_proto := orig.owner_protocol())
1878
                            and (orig_author := orig_proto.get_by_id(orig_author_id))):
1879
                        origs_could_bridge = orig_author.is_enabled(proto)
1✔
1880

1881
                else:
1882
                    msg = f"original object(s) {original_ids} weren't bridged to {label}"
1✔
1883
                    last_retry = False
1✔
1884
                    if retries := request.headers.get(TASK_RETRIES_HEADER):
1✔
1885
                        if (last_retry := int(retries) >= TASK_RETRIES_RECEIVE):
1✔
1886
                            logger.info(f'last retry! skipping {proto.LABEL} and continuing')
1✔
1887

1888
                    if (proto.LABEL not in from_user.DEFAULT_ENABLED_PROTOCOLS
1✔
1889
                            and origs_could_bridge and not last_retry):
1890
                        # retry later; original obj may still be bridging
1891
                        # TODO: limit to brief window, eg no older than 2h? 1d?
1892
                        error(msg, status=304)
1✔
1893

1894
                    logger.info(msg)
1✔
1895
                    continue
1✔
1896

1897
            util.add(to_protocols, proto)
1✔
1898

1899
        logger.info(f'allowed protocols {[p.LABEL for p in to_protocols]}')
1✔
1900

1901
        # process direct targets
1902
        for target_id in target_uris:
1✔
1903
            target_proto = Protocol.for_id(target_id)
1✔
1904
            if not target_proto:
1✔
1905
                logger.info(f"Can't determine protocol for {target_id}")
1✔
1906
                continue
1✔
1907
            elif target_proto.is_blocklisted(target_id):
1✔
1908
                logger.debug(f'{target_id} is blocklisted')
1✔
1909
                continue
1✔
1910

1911
            target_is_actor = (target_id in mentioned_urls
1✔
1912
                               or obj.type in as1.VERBS_WITH_ACTOR_OBJECT)
1913

1914
            target_obj_id = (ids.profile_id(id=target_id, proto=target_proto)
1✔
1915
                             if target_is_actor
1916
                             # not ideal. this can sometimes be a non-user, eg
1917
                             # blocking a blocklist. ok right now since profile_id()
1918
                             # returns its input id unchanged if it doesn't look like
1919
                             # a user id, but that's brittle.
1920
                             else target_id)
1921
            orig_obj = target_proto.load(target_obj_id, raise_=False)
1✔
1922
            if not orig_obj or not orig_obj.as1:
1✔
1923
                logger.info(f"Couldn't load {target_obj_id}")
1✔
1924
                continue
1✔
1925

1926
            target_author_key = (target_proto(id=target_id).key if target_is_actor
1✔
1927
                                 else target_proto.actor_key(orig_obj))
1928

1929
            if not from_user.is_enabled(target_proto):
1✔
1930
                # if author isn't bridged and target user is, DM a prompt and
1931
                # add a notif for the target user
1932
                if (target_id in (in_reply_tos + quoted_posts + mentioned_urls)
1✔
1933
                        and target_author_key):
1934
                    if target_author := target_author_key.get():
1✔
1935
                        if target_author.is_enabled(from_cls):
1✔
1936
                            notifications.add_notification(target_author, write_obj)
1✔
1937
                            verb, noun = (
1✔
1938
                                ('replied to', 'replies') if target_id in in_reply_tos
1939
                                else ('quoted', 'quotes') if target_id in quoted_posts
1940
                                else ('mentioned', 'mentions'))
1941
                            dms.maybe_send(from_=target_proto, to_user=from_user,
1✔
1942
                                           type='replied_to_bridged_user', text=f"""\
1943
Hi! You <a href="{inner_obj_as1.get('url') or inner_obj_id}">recently {verb}</a> {target_author.html_link()}, who's bridged here from {target_proto.PHRASE}. If you want them to see your {noun}, you can bridge your account into {target_proto.PHRASE} by following this account. <a href="https://fed.brid.gy/docs">See the docs</a> for more information.""")
1944

1945
                continue
1✔
1946

1947
            # deliver self-replies to followers
1948
            # https://github.com/snarfed/bridgy-fed/issues/639
1949
            if target_id in in_reply_tos and owner == as1.get_owner(orig_obj.as1):
1✔
1950
                is_self_reply = True
1✔
1951
                logger.info(f'self reply!')
1✔
1952

1953
            # also add copies' targets
1954
            for copy in orig_obj.copies:
1✔
1955
                proto = PROTOCOLS[copy.protocol]
1✔
1956
                if proto in to_protocols:
1✔
1957
                    # copies generally won't have their own Objects
1958
                    if target := proto.target_for(Object(id=copy.uri)):
1✔
1959
                        target = util.normalize_url(target, trailing_slash=False)
1✔
1960
                        logger.debug(f'Adding target {target} for copy {copy.uri} of original {target_id}')
1✔
1961
                        targets[Target(protocol=copy.protocol, uri=target)] = orig_obj
1✔
1962

1963
            if target_proto == from_cls:
1✔
1964
                logger.debug(f'Skipping same-protocol target {target_id}')
1✔
1965
                continue
1✔
1966

1967
            target = target_proto.target_for(orig_obj)
1✔
1968
            if not target:
1✔
1969
                # TODO: surface errors like this somehow?
1970
                logger.error(f"Can't find delivery target for {target_id}")
×
1971
                continue
×
1972

1973
            target = util.normalize_url(target, trailing_slash=False)
1✔
1974
            logger.debug(f'Target for {target_id} is {target} {target_author_key}')
1✔
1975

1976
            # only use orig_obj for inReplyTos, like/repost objects, reply's original
1977
            # post's mentions, etc
1978
            # https://github.com/snarfed/bridgy-fed/issues/1237
1979
            target_obj = None
1✔
1980
            if target_id in in_reply_tos + as1.get_ids(obj.as1, 'object'):
1✔
1981
                target_obj = orig_obj
1✔
1982
            elif target_id in orig_post_mentions:
1✔
1983
                target_obj = orig_post_mentions[target_id]
1✔
1984
            targets[Target(protocol=target_proto.LABEL, uri=target)] = target_obj
1✔
1985

1986
            if target_author_key:
1✔
1987
                logger.debug(f'Recipient is {target_author_key}')
1✔
1988
                if obj.type not in DONT_NOTIFY_TYPES:
1✔
1989
                    if write_obj.add('notify', target_author_key):
1✔
1990
                        write_obj.dirty = True
1✔
1991

1992
        if obj.type == 'undo':
1✔
1993
            logger.info('Object is an undo; adding targets for inner object')
1✔
1994
            if set(inner_obj_as1.keys()) == {'id'}:
1✔
1995
                inner_obj = from_cls.load(inner_obj_id, raise_=False)
1✔
1996
            else:
1997
                inner_obj = Object(id=inner_obj_id, our_as1=inner_obj_as1)
1✔
1998
            if inner_obj:
1✔
1999
                for target, target_obj in from_cls.targets(
1✔
2000
                        inner_obj, from_user=from_user, internal=True).items():
2001
                    targets[target] = target_obj
1✔
2002
                    util.add(to_protocols, PROTOCOLS[target.protocol])
1✔
2003

2004
        if not to_protocols:
1✔
2005
            return {}
1✔
2006

2007
        logger.info(f'Direct targets: {[t.uri for t in targets.keys()]}')
1✔
2008

2009
        # deliver to followers, if appropriate
2010
        user_key = from_cls.actor_key(obj, allow_opt_out=allow_opt_out)
1✔
2011
        if not user_key:
1✔
2012
            logger.info("Can't tell who this is from! Skipping followers.")
1✔
2013
            return targets
1✔
2014

2015
        # we deliver to HAS_COPIES protocols separately, below. we assume they have
2016
        # follower-independent targets.
2017
        to_followers_protos = [
1✔
2018
            p for p in to_protocols
2019
            if not (p.HAS_COPIES and p.DEFAULT_TARGET)
2020
            and not (p.USES_OBJECT_FEED and p.LABEL not in from_user.has_object_feed_followers_on)]
2021
        followers = []
1✔
2022
        is_undo_block = obj.type == 'undo' and inner_obj_as1.get('verb') == 'block'
1✔
2023
        if (obj.type in ('post', 'update', 'delete', 'move', 'share', 'undo')
1✔
2024
                and (not is_reply or is_self_reply) and not is_undo_block
2025
                and to_followers_protos):
2026
            logger.info(f'Delivering to followers of {user_key.id()} on {[p.LABEL for p in to_followers_protos]}')
1✔
2027
            # query each protocol individually
2028
            for proto in to_followers_protos:
1✔
2029
                kind = proto._get_kind()
1✔
2030
                for f in Follower.query(
1✔
2031
                        Follower.to == user_key,
2032
                        Follower.status == 'active',
2033
                        Follower.from_ >= ndb.Key(kind, '\x00'),
2034
                        Follower.from_ < ndb.Key(kind + '\x00', '\x00')):
2035
                    # skip protocol bot users
2036
                    if not Protocol.for_bridgy_subdomain(f.from_.id()):
1✔
2037
                        followers.append(f)
1✔
2038

2039
            logger.debug(f'  loaded {len(followers)} followers')
1✔
2040

2041
            user_keys = [f.from_ for f in followers]
1✔
2042
            users = [u for u in ndb.get_multi(user_keys) if u]
1✔
2043
            logger.debug(f'  loaded {len(users)} users')
1✔
2044

2045
            User.load_multi(users)
1✔
2046
            logger.debug(f'  loaded user objects')
1✔
2047

2048
            if (not followers and
1✔
2049
                (util.domain_or_parent_in(from_user.key.id(), LIMITED_DOMAINS)
2050
                 or util.domain_or_parent_in(obj.key.id(), LIMITED_DOMAINS))):
2051
                logger.info(f'skipping, {from_user.key.id()} is on a limited domain and has no followers')
1✔
2052
                return {}
1✔
2053

2054
            # add to followers' feeds, if any
2055
            if not internal and obj.type in ('post', 'update', 'share'):
1✔
2056
                if write_obj.type not in as1.ACTOR_TYPES:
1✔
2057
                    write_obj.feed = [u.key for u in users if u.USES_OBJECT_FEED]
1✔
2058
                    if write_obj.feed:
1✔
2059
                        write_obj.dirty = True
1✔
2060

2061
            # collect targets for followers
2062
            target_obj = (original_objs.get(inner_obj_id)
1✔
2063
                          if obj.type == 'share' else None)
2064
            for user in users:
1✔
2065
                if user.is_blocking(from_user):
1✔
2066
                    logger.debug(f'  {user.key.id()} blocks {from_user.key.id()}')
1✔
2067
                    continue
1✔
2068

2069
                # TODO: should we pass remote=False through here to Protocol.load?
2070
                target = user.target_for(user.obj, shared=True) if user.obj else None
1✔
2071
                if not target:
1✔
2072
                    continue
1✔
2073

2074
                target = util.normalize_url(target, trailing_slash=False)
1✔
2075
                targets[Target(protocol=user.LABEL, uri=target)] = target_obj
1✔
2076

2077
            logger.debug(f'  collected {len(targets)} targets')
1✔
2078

2079
        # deliver to enabled HAS_COPIES protocols proactively
2080
        if obj.type in ('post', 'update', 'delete', 'share'):
1✔
2081
            for proto in to_protocols:
1✔
2082
                if proto.HAS_COPIES and proto.DEFAULT_TARGET:
1✔
2083
                    logger.info(f'user has {proto.LABEL} enabled, adding {proto.DEFAULT_TARGET}')
1✔
2084
                    targets.setdefault(
1✔
2085
                        Target(protocol=proto.LABEL, uri=proto.DEFAULT_TARGET), None)
2086

2087
        # maps string target URL to (Target, Object) tuple
2088
        candidates = {t.uri: (t, obj) for t, obj in targets.items()}
1✔
2089
        # maps Target to Object or None
2090
        targets = {}
1✔
2091
        source_domains = [
1✔
2092
            util.domain_from_link(url) for url in
2093
            (obj.as1.get('id'), obj.as1.get('url'), as1.get_owner(obj.as1))
2094
            if util.is_web(url)
2095
        ]
2096
        for url in sorted(util.dedupe_urls(
1✔
2097
                candidates.keys(),
2098
                # preserve our PDS URL without trailing slash in path
2099
                # https://atproto.com/specs/did#did-documents
2100
                trailing_slash=False)):
2101
            if util.is_web(url) and util.domain_from_link(url) in source_domains:
1✔
2102
                logger.info(f'Skipping same-domain target {url}')
×
2103
                continue
×
2104
            elif from_user.is_blocking(url):
1✔
2105
                logger.debug(f'{from_user.key.id()} blocks {url}')
1✔
2106
                continue
1✔
2107

2108
            target, obj = candidates[url]
1✔
2109
            targets[target] = obj
1✔
2110

2111
        return targets
1✔
2112

2113
    @classmethod
1✔
2114
    def load(cls, id, remote=None, local=True, raise_=True, raw=False, csv=False,
1✔
2115
             **kwargs):
2116
        """Loads and returns an Object from datastore or HTTP fetch.
2117

2118
        Sets the :attr:`new` and :attr:`changed` attributes if we know either
2119
        one for the loaded object, ie local is True and remote is True or None.
2120

2121
        Args:
2122
          id (str)
2123
          remote (bool): whether to fetch the object over the network. If True,
2124
            fetches even if we already have the object stored, and updates our
2125
            stored copy. If False and we don't have the object stored, returns
2126
            None. Default (None) means to fetch over the network only if we
2127
            don't already have it stored.
2128
          local (bool): whether to load from the datastore before
2129
            fetching over the network. If False, still stores back to the
2130
            datastore after a successful remote fetch.
2131
          raise_ (bool): if False, catches any :class:`request.RequestException`
2132
            or :class:`HTTPException` raised by :meth:`fetch()` and returns
2133
            ``None`` instead
2134
          raw (bool): whether to load this as a "raw" id, as is, without
2135
            normalizing to an on-protocol object id. Exact meaning varies by subclass.
2136
          csv (bool): whether to specifically load a CSV object
2137
            TODO: merge this into raw, using returned Content-Type?
2138
          kwargs: passed through to :meth:`fetch()`
2139

2140
        Returns:
2141
          models.Object: loaded object, or None if it isn't fetchable, eg a
2142
          non-URL string for Web, or ``remote`` is False and it isn't in the
2143
          datastore
2144

2145
        Raises:
2146
          requests.HTTPError: anything that :meth:`fetch` raises, if ``raise_``
2147
            is True
2148
        """
2149
        assert id
1✔
2150
        assert local or remote is not False
1✔
2151
        # logger.debug(f'Loading Object {id} local={local} remote={remote}')
2152

2153
        if not raw:
1✔
2154
            id = ids.normalize_object_id(id=id, proto=cls)
1✔
2155

2156
        obj = orig_as1 = None
1✔
2157
        if local:
1✔
2158
            if obj := Object.get_by_id(id):
1✔
2159
                if csv and not obj.is_csv:
1✔
2160
                    return None
1✔
2161
                elif obj.as1 or obj.csv or obj.raw or obj.deleted:
1✔
2162
                    # logger.debug(f'  {id} got from datastore')
2163
                    obj.new = False
1✔
2164

2165
        if remote is False:
1✔
2166
            return obj
1✔
2167
        elif remote is None and obj:
1✔
2168
            if obj.updated < util.as_utc(util.now() - OBJECT_REFRESH_AGE):
1✔
2169
                # logger.debug(f'  last updated {obj.updated}, refreshing')
2170
                pass
1✔
2171
            else:
2172
                return obj
1✔
2173

2174
        if obj:
1✔
2175
            orig_as1 = obj.as1
1✔
2176
            obj.our_as1 = None
1✔
2177
            obj.new = False
1✔
2178
        else:
2179
            if cls == Protocol:
1✔
2180
                return None
1✔
2181
            obj = Object(id=id)
1✔
2182
            if local:
1✔
2183
                # logger.debug(f'  {id} not in datastore')
2184
                obj.new = True
1✔
2185
                obj.changed = False
1✔
2186

2187
        try:
1✔
2188
            fetched = cls.fetch(obj, csv=csv, **kwargs)
1✔
2189
        except (RequestException, HTTPException, InvalidStatus) as e:
1✔
2190
            if raise_:
1✔
2191
                raise
1✔
2192
            util.interpret_http_exception(e)
1✔
2193
            return None
1✔
2194

2195
        if not fetched:
1✔
2196
            return None
1✔
2197
        elif csv and not obj.is_csv:
1✔
2198
            return None
×
2199

2200
        # https://stackoverflow.com/a/3042250/186123
2201
        size = len(_entity_to_protobuf(obj)._pb.SerializeToString())
1✔
2202
        if size > MAX_ENTITY_SIZE:
1✔
2203
            logger.warning(f'Object is too big! {size} bytes is over {MAX_ENTITY_SIZE}')
1✔
2204
            return None
1✔
2205

2206
        obj.resolve_ids()
1✔
2207
        obj.normalize_ids()
1✔
2208

2209
        if obj.new is False:
1✔
2210
            obj.changed = obj.activity_changed(orig_as1)
1✔
2211

2212
        if obj.source_protocol not in (cls.LABEL, cls.ABBREV):
1✔
2213
            if obj.source_protocol:
1✔
2214
                logger.warning(f'Object {obj.key.id()} changed protocol from {obj.source_protocol} to {cls.LABEL} ?!')
×
2215
            obj.source_protocol = cls.LABEL
1✔
2216

2217
        obj.put()
1✔
2218
        return obj
1✔
2219

2220
    @classmethod
1✔
2221
    def check_supported(cls, obj, direction):
1✔
2222
        """If this protocol doesn't support this activity, raises HTTP 204.
2223

2224
        Also reports an error.
2225

2226
        (This logic is duplicated in some protocols, eg ActivityPub, so that
2227
        they can short circuit out early. It generally uses their native formats
2228
        instead of AS1, before an :class:`models.Object` is created.)
2229

2230
        Args:
2231
          obj (Object)
2232
          direction (str): ``'receive'`` or  ``'send'``
2233

2234
        Raises:
2235
          werkzeug.HTTPException: if this protocol doesn't support this object
2236
        """
2237
        assert direction in ('receive', 'send')
1✔
2238
        if not obj.type:
1✔
2239
            return
×
2240

2241
        inner = as1.get_object(obj.as1)
1✔
2242
        inner_type = as1.object_type(inner) or ''
1✔
2243
        if (obj.type not in cls.SUPPORTED_AS1_TYPES
1✔
2244
            or (obj.type in as1.CRUD_VERBS
2245
                and inner_type
2246
                and inner_type not in cls.SUPPORTED_AS1_TYPES)):
2247
            error(f"Bridgy Fed for {cls.LABEL} doesn't support {obj.type} {inner_type} yet", status=204)
1✔
2248

2249
        # don't allow posts with blank content and no image/video/audio
2250
        crud_obj = (as1.get_object(obj.as1) if obj.type in ('post', 'update')
1✔
2251
                    else obj.as1)
2252
        if (crud_obj.get('objectType') in as1.POST_TYPES
1✔
2253
                and not util.get_url(crud_obj, key='image')
2254
                and not any(util.get_urls(crud_obj, 'attachments', inner_key='stream'))
2255
                # TODO: handle articles with displayName but not content
2256
                and not source.html_to_text(crud_obj.get('content')).strip()):
2257
            error('Blank content and no image or video or audio', status=204)
1✔
2258

2259
        # receiving DMs is only allowed to protocol bot accounts
2260
        if direction == 'receive':
1✔
2261
            if recip := as1.recipient_if_dm(obj.as1):
1✔
2262
                owner = as1.get_owner(obj.as1)
1✔
2263
                if (not cls.SUPPORTS_DMS or (recip not in common.bot_user_ids()
1✔
2264
                                             and owner not in common.bot_user_ids())):
2265
                    # reply and say DMs aren't supported
2266
                    from_proto = obj.owner_protocol()
1✔
2267
                    to_proto = Protocol.for_id(recip)
1✔
2268
                    if owner and from_proto and to_proto:
1✔
2269
                        if ((from_user := from_proto.get_or_create(id=owner))
1✔
2270
                                and (to_user := to_proto.get_or_create(id=recip))):
2271
                            in_reply_to = (inner.get('id') if obj.type == 'post'
1✔
2272
                                           else obj.as1.get('id'))
2273
                            text = f"Hi! Sorry, this account is bridged from {to_user.PHRASE}, so it doesn't support DMs. Try getting in touch another way!"
1✔
2274
                            type = f'dms_not_supported-{to_user.key.id()}'
1✔
2275
                            dms.maybe_send(from_=to_user, to_user=from_user,
1✔
2276
                                           text=text, type=type,
2277
                                           in_reply_to=in_reply_to)
2278

2279
                    error("Bridgy Fed doesn't support DMs", status=204)
1✔
2280

2281
            # check that this activity is public. only do this for some activities,
2282
            # not eg likes or follows, since Mastodon doesn't currently mark those
2283
            # as explicitly public.
2284
            elif (obj.type in set(('post', 'update')) | as1.POST_TYPES | as1.ACTOR_TYPES
1✔
2285
                  and not util.domain_or_parent_in(crud_obj.get('id'), NON_PUBLIC_DOMAINS)
2286
                  and not as1.is_public(obj.as1, unlisted=False)):
2287
                error('Bridgy Fed only supports public activities', status=204)
1✔
2288

2289
    @classmethod
1✔
2290
    def block(cls, from_user, arg):
1✔
2291
        """Blocks a user or list.
2292

2293
        Args:
2294
          from_user (models.User): user doing the blocking
2295
          arg (str): handle or id of user/list to block
2296

2297
        Returns:
2298
          models.User or models.Object: user or list that was blocked
2299

2300
        Raises:
2301
          ValueError: if arg doesn't look like a user or list on this protocol
2302
        """
2303
        logger.info(f'user {from_user.key.id()} trying to block {arg}')
1✔
2304

2305
        def fail(msg):
1✔
2306
            logger.warning(msg)
1✔
2307
            raise ValueError(msg)
1✔
2308

2309
        blockee = None
1✔
2310
        try:
1✔
2311
            # first, try interpreting as a user handle or id
2312
            blockee = load_user(arg, proto=cls, create=True, allow_opt_out=True)
1✔
2313
        except (AssertionError, AttributeError, BadRequest, RuntimeError, ValueError) as err:
1✔
2314
            logger.info(err)
1✔
2315

2316
        if type(from_user) == type(blockee):
1✔
2317
            fail(f'{blockee.html_link()} is on {from_user.PHRASE}! Try blocking them there.')
1✔
2318

2319
        # may not be a user, see if it's a list
2320
        if not blockee:
1✔
2321
            if not cls or cls == Protocol:
1✔
2322
                cls = Protocol.for_id(arg)
1✔
2323

2324
            if cls and (blockee := cls.load(arg)) and blockee.type == 'collection':
1✔
2325
                if blockee.source_protocol == from_user.LABEL:
1✔
2326
                    fail(f'{blockee.html_link()} is on {from_user.PHRASE}! Try blocking it there.')
1✔
2327
            else:
2328
                if blocklist := from_user.add_domain_blocklist(arg):
1✔
2329
                    return blocklist
1✔
2330
                fail(f"{arg} doesn't look like a user or list{' on ' + cls.PHRASE if cls else ''}, or we couldn't fetch it")
1✔
2331

2332
        logger.info(f'  blocking {blockee.key.id()}')
1✔
2333
        id = f'{from_user.key.id()}#bridgy-fed-block-{util.now().isoformat()}'
1✔
2334
        obj = Object(id=id, source_protocol=from_user.LABEL, our_as1={
1✔
2335
            'objectType': 'activity',
2336
            'verb': 'block',
2337
            'id': id,
2338
            'actor': from_user.key.id(),
2339
            'object': blockee.key.id(),
2340
        })
2341
        obj.put()
1✔
2342
        from_user.deliver(obj, from_user=from_user)
1✔
2343

2344
        return blockee
1✔
2345

2346
    @classmethod
1✔
2347
    def unblock(cls, from_user, arg):
1✔
2348
        """Unblocks a user or list.
2349

2350
        Args:
2351
          from_user (models.User): user doing the unblocking
2352
          arg (str): handle or id of user/list to unblock
2353

2354
        Returns:
2355
          models.User or models.Object: user or list that was unblocked
2356

2357
        Raises:
2358
          ValueError: if arg doesn't look like a user or list on this protocol
2359
        """
2360
        logger.info(f'user {from_user.key.id()} trying to unblock {arg}')
1✔
2361
        def fail(msg):
1✔
2362
            logger.warning(msg)
1✔
2363
            raise ValueError(msg)
1✔
2364

2365
        blockee = None
1✔
2366
        try:
1✔
2367
            # first, try interpreting as a user handle or id
2368
            blockee = load_user(arg, cls, create=True, allow_opt_out=True)
1✔
2369
        except (AssertionError, AttributeError, BadRequest, RuntimeError, ValueError) as err:
1✔
2370
            logger.info(err)
1✔
2371

2372
        if type(from_user) == type(blockee):
1✔
2373
            fail(f'{blockee.html_link()} is on {from_user.PHRASE}! Try unblocking them there.')
1✔
2374

2375
        # may not be a user, see if it's a list
2376
        if not blockee:
1✔
2377
            if not cls or cls == Protocol:
1✔
2378
                cls = Protocol.for_id(arg)
1✔
2379

2380
            if cls and (blockee := cls.load(arg)) and blockee.type == 'collection':
1✔
2381
                if blockee.source_protocol == from_user.LABEL:
1✔
2382
                    fail(f'{blockee.html_link()} is on {from_user.PHRASE}! Try blocking it there.')
1✔
2383
            else:
2384
                if blocklist := from_user.remove_domain_blocklist(arg):
1✔
2385
                    return blocklist
1✔
2386
                fail(f"{arg} doesn't look like a user or list{' on ' + cls.PHRASE if cls else ''}, or we couldn't fetch it")
1✔
2387

2388
        logger.info(f'  unblocking {blockee.key.id()}')
1✔
2389
        id = f'{from_user.key.id()}#bridgy-fed-unblock-{util.now().isoformat()}'
1✔
2390
        obj = Object(id=id, source_protocol=from_user.LABEL, our_as1={
1✔
2391
            'objectType': 'activity',
2392
            'verb': 'undo',
2393
            'id': id,
2394
            'actor': from_user.key.id(),
2395
            'object': {
2396
                'objectType': 'activity',
2397
                'verb': 'block',
2398
                'actor': from_user.key.id(),
2399
                'object': blockee.key.id(),
2400
            },
2401
        })
2402
        obj.put()
1✔
2403
        from_user.deliver(obj, from_user=from_user)
1✔
2404

2405
        return blockee
1✔
2406

2407

2408
@cloud_tasks_only(log=None)
1✔
2409
def receive_task():
1✔
2410
    """Task handler for a newly received :class:`models.Object`.
2411

2412
    Calls :meth:`Protocol.receive` with the form parameters.
2413

2414
    Parameters:
2415
      authed_as (str): passed to :meth:`Protocol.receive`
2416
      obj_id (str): key id of :class:`models.Object` to handle
2417
      received_at (str, ISO 8601 timestamp): when we first saw (received)
2418
        this activity
2419
      *: If ``obj_id`` is unset, all other parameters are properties for a new
2420
        :class:`models.Object` to handle
2421

2422
    TODO: migrate incoming webmentions to this. See how we did it for AP. The
2423
    difficulty is that parts of :meth:`protocol.Protocol.receive` depend on
2424
    setup in :func:`web.webmention`, eg :class:`models.Object` with ``new`` and
2425
    ``changed``, HTTP request details, etc. See stash for attempt at this for
2426
    :class:`web.Web`.
2427
    """
2428
    common.log_request()
1✔
2429
    form = request.form.to_dict()
1✔
2430

2431
    authed_as = form.pop('authed_as', None)
1✔
2432
    internal = authed_as == PRIMARY_DOMAIN or authed_as in PROTOCOL_DOMAINS
1✔
2433

2434
    obj = Object.from_request()
1✔
2435
    assert obj
1✔
2436
    assert obj.source_protocol
1✔
2437
    obj.new = True
1✔
2438

2439
    if received_at := form.pop('received_at', None):
1✔
2440
        received_at = datetime.fromisoformat(received_at)
1✔
2441

2442
    try:
1✔
2443
        return PROTOCOLS[obj.source_protocol].receive(
1✔
2444
            obj=obj, authed_as=authed_as, internal=internal, received_at=received_at)
2445
    except RequestException as e:
1✔
2446
        util.interpret_http_exception(e)
1✔
2447
        error(e, status=304)
1✔
2448
    except (RuntimeError, ValueError) as e:
1✔
2449
        logger.warning(e, exc_info=True)
×
2450
        error(e, status=304)
×
2451

2452

2453
@cloud_tasks_only(log=None)
1✔
2454
def send_task():
1✔
2455
    """Task handler for sending an activity to a single specific destination.
2456

2457
    Calls :meth:`Protocol.send` with the form parameters.
2458

2459
    Parameters:
2460
      protocol (str): :class:`Protocol` to send to
2461
      url (str): destination URL to send to
2462
      obj_id (str): key id of :class:`models.Object` to send
2463
      orig_obj_id (str): optional, :class:`models.Object` key id of the
2464
        "original object" that this object refers to, eg replies to or reposts
2465
        or likes
2466
      user (url-safe google.cloud.ndb.key.Key): :class:`models.User` (actor)
2467
        this activity is from
2468
      *: If ``obj_id`` is unset, all other parameters are properties for a new
2469
        :class:`models.Object` to handle
2470
    """
2471
    common.log_request()
1✔
2472

2473
    # prepare
2474
    form = request.form.to_dict()
1✔
2475
    url = form.get('url')
1✔
2476
    protocol = form.get('protocol')
1✔
2477
    if not url or not protocol:
1✔
2478
        logger.warning(f'Missing protocol or url; got {protocol} {url}')
1✔
2479
        return '', 204
1✔
2480

2481
    target = Target(uri=url, protocol=protocol)
1✔
2482
    obj = Object.from_request()
1✔
2483
    assert obj and obj.key and obj.key.id()
1✔
2484

2485
    PROTOCOLS[protocol].check_supported(obj, 'send')
1✔
2486
    allow_opt_out = (obj.type == 'delete')
1✔
2487

2488
    user = None
1✔
2489
    if user_key := form.get('user'):
1✔
2490
        key = ndb.Key(urlsafe=user_key)
1✔
2491
        # use get_by_id so that we follow use_instead
2492
        user = PROTOCOLS_BY_KIND[key.kind()].get_by_id(
1✔
2493
            key.id(), allow_opt_out=allow_opt_out)
2494

2495
    # send
2496
    delay = ''
1✔
2497
    if request.headers.get('X-AppEngine-TaskRetryCount') == '0' and obj.created:
1✔
2498
        delay_s = int((util.now().replace(tzinfo=None) - obj.created).total_seconds())
1✔
2499
        delay = f'({delay_s} s behind)'
1✔
2500
    logger.info(f'Sending {obj.source_protocol} {obj.type} {obj.key.id()} to {protocol} {url} {delay}')
1✔
2501
    logger.debug(f'  AS1: {json_dumps(obj.as1, indent=2)}')
1✔
2502
    sent = None
1✔
2503
    try:
1✔
2504
        sent = PROTOCOLS[protocol].send(obj, url, from_user=user,
1✔
2505
                                        orig_obj_id=form.get('orig_obj_id'))
2506
    except (MemcacheServerError, MemcacheUnexpectedCloseError,
1✔
2507
            MemcacheUnknownError) as e:
2508
        # our memorystore instance is probably undergoing maintenance. re-enqueue
2509
        # task with a delay.
2510
        # https://docs.cloud.google.com/memorystore/docs/memcached/about-maintenance
2511
        report_error(f'memcache error on send task, re-enqueuing in {MEMCACHE_DOWN_TASK_DELAY}: {e}')
1✔
2512
        common.create_task(queue='send', delay=MEMCACHE_DOWN_TASK_DELAY, **form)
1✔
2513
        sent = False
1✔
2514
    except BaseException as e:
1✔
2515
        code, body = util.interpret_http_exception(e)
1✔
2516
        if not code and not body:
1✔
2517
            raise
1✔
2518

2519
    if sent is False:
1✔
2520
        logger.info(f'Failed sending!')
1✔
2521

2522
    return '', 200 if sent else 204 if sent is False else 304
1✔
2523

2524

2525
@cloud_tasks_only(log=None)
1✔
2526
def user_enabled_task():
1✔
2527
    r"""Task handler for when a user enables a protocol.
2528

2529
    DMs any dormant :class:`models.Follower`\s pointing at the user to let them
2530
    know the user is now bridged, so they can follow them for real, and flips
2531
    those ``Follower``\s from ``dormant`` to ``inactive``.
2532

2533
    Parameters:
2534
      user (url-safe google.cloud.ndb.key.Key): the :class:`models.User` who
2535
        enabled bridging
2536
      protocol (str): ``LABEL`` of the protocol they enabled
2537
    """
2538
    common.log_request()
1✔
2539

2540
    proto = PROTOCOLS[request.form['protocol']]
1✔
2541
    user = ndb.Key(urlsafe=request.form['user']).get()
1✔
2542
    assert user
1✔
2543
    logger.info(f'{user.key.id()} is {user.status or "ok"}')
1✔
2544
    if user.status:
1✔
2545
        raise ErrorButDoNotRetryTask()
×
2546

2547
    followers = Follower.query(Follower.to == user.key,
1✔
2548
                               Follower.status == 'dormant').fetch()
2549
    from_users = ndb.get_multi(
1✔
2550
        f.from_ for f in followers if f.from_.kind() == proto._get_kind())
2551

2552
    for follower, from_user in zip(followers, from_users):
1✔
2553
        if from_user and not from_user.status:
1✔
2554
            logger.info('Updating and DMing Follower from {from_user.key.id()}')
1✔
2555
            follower.status = 'inactive'
1✔
2556
            follower.put()
1✔
2557

2558
            relationship = {
1✔
2559
                'bounce': ', who you originally followed before you Bounced,',
2560
                'requested': ', who you asked to bridge,',
2561
            }.get(follower.reason, '')
2562
            dms.maybe_send(from_=proto, to_user=from_user, text=f'<p>Hi! {user.html_link(proto=proto, proto_fallback=True)}{relationship} has bridged their account into {proto.PHRASE}. You can follow them now if you want.')
1✔
2563

2564
    return '', 200
1✔
2565

2566

2567
@cloud_tasks_only(log=None)
1✔
2568
def migrate_out_task():
1✔
2569
    """Task handler for finishing a migration out.
2570

2571
    Currently, for migrating out to ATProto, uploads the user's blobs to the new PDS.
2572
    Otherwise, does nothing.
2573

2574
    Parameters:
2575
      user (str, url-safe ndb.Key of a User): the bridged :class:`models.User`
2576
        migrating out
2577
      protocol (str): destination protocol
2578
      auth (optional url-safe ndb.Key of an oauth-dropins auth entity): the user's
2579
        new account. For ATProto, an :class:`oauth_dropins.bluesky.BlueskyAuth`.
2580
    """
2581
    from atproto import ATProto
1✔
2582

2583
    common.log_request()
1✔
2584

2585
    user = ndb.Key(urlsafe=request.form['user']).get()
1✔
2586
    if not user:
1✔
NEW
2587
        raise ErrorButDoNotRetryTask()
×
2588

2589
    if request.form['protocol'] == ATProto.LABEL:
1✔
2590
        auth = ndb.Key(urlsafe=request.form['auth']).get()
1✔
2591
        assert auth
1✔
2592
        ATProto.migrate_out_blobs(user, auth)
1✔
2593

2594
    return '', 200
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc