• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

snarfed / bridgy-fed / 1f090e4b-ac83-4a0b-9d95-4f730361e684

07 Aug 2025 09:09PM UTC coverage: 92.591% (+0.02%) from 92.568%
1f090e4b-ac83-4a0b-9d95-4f730361e684

push

circleci

snarfed
activitypub.postprocess_as2: for replies, add original post's mention tags

fixes https://github.com/snarfed/bridgy-fed/issues/1608

1 of 1 new or added line in 1 file covered. (100.0%)

54 existing lines in 5 files now uncovered.

5586 of 6033 relevant lines covered (92.59%)

0.93 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

94.25
/protocol.py
1
"""Base protocol class and common code."""
2
import copy
1✔
3
from datetime import datetime, timedelta, timezone
1✔
4
import logging
1✔
5
import os
1✔
6
import re
1✔
7
from threading import Lock
1✔
8
from urllib.parse import urljoin, urlparse
1✔
9

10
from cachetools import cached, LRUCache
1✔
11
from flask import request
1✔
12
from google.cloud import ndb
1✔
13
from google.cloud.ndb import OR
1✔
14
from google.cloud.ndb.model import _entity_to_protobuf
1✔
15
from granary import as1, as2, source
1✔
16
from granary.source import html_to_text
1✔
17
from oauth_dropins.webutil.appengine_info import DEBUG
1✔
18
from oauth_dropins.webutil.flask_util import cloud_tasks_only
1✔
19
from oauth_dropins.webutil import models
1✔
20
from oauth_dropins.webutil import util
1✔
21
from oauth_dropins.webutil.util import json_dumps, json_loads
1✔
22
from requests import RequestException
1✔
23
import werkzeug.exceptions
1✔
24
from werkzeug.exceptions import BadGateway, HTTPException
1✔
25

26
import common
1✔
27
from common import (
1✔
28
    DOMAIN_BLOCKLIST,
29
    DOMAIN_RE,
30
    DOMAINS,
31
    ErrorButDoNotRetryTask,
32
    PRIMARY_DOMAIN,
33
    PROTOCOL_DOMAINS,
34
    report_error,
35
    subdomain_wrap,
36
)
37
import dms
1✔
38
import ids
1✔
39
from ids import (
1✔
40
    BOT_ACTOR_AP_IDS,
41
    normalize_user_id,
42
    translate_object_id,
43
    translate_user_id,
44
)
45
import memcache
1✔
46
from models import (
1✔
47
    DM,
48
    Follower,
49
    Object,
50
    PROTOCOLS,
51
    PROTOCOLS_BY_KIND,
52
    Target,
53
    User,
54
)
55
import notifications
1✔
56

57
OBJECT_REFRESH_AGE = timedelta(days=30)
1✔
58
DELETE_TASK_DELAY = timedelta(minutes=2)
1✔
59
CREATE_MAX_AGE = timedelta(weeks=2)
1✔
60

61
# require a follow for users on these domains before we deliver anything from
62
# them other than their profile
63
LIMITED_DOMAINS = (os.getenv('LIMITED_DOMAINS', '').split()
1✔
64
                   or util.load_file_lines('limited_domains'))
65

66
DONT_STORE_AS1_TYPES = as1.CRUD_VERBS | set((
1✔
67
    'accept',
68
    'reject',
69
    'stop-following',
70
    'undo',
71
))
72
STORE_AS1_TYPES = (as1.ACTOR_TYPES | as1.POST_TYPES | as1.VERBS_WITH_OBJECT
1✔
73
                   - DONT_STORE_AS1_TYPES)
74

75
logger = logging.getLogger(__name__)
1✔
76

77

78
def error(*args, status=299, **kwargs):
1✔
79
    """Default HTTP status code to 299 to prevent retrying task."""
80
    return common.error(*args, status=status, **kwargs)
1✔
81

82

83
def activity_id_memcache_key(id):
1✔
84
    return memcache.key(f'receive-{id}')
1✔
85

86

87
class Protocol:
1✔
88
    """Base protocol class. Not to be instantiated; classmethods only."""
89
    ABBREV = None
1✔
90
    """str: lower case abbreviation, used in URL paths"""
1✔
91
    PHRASE = None
1✔
92
    """str: human-readable name or phrase. Used in phrases like ``Follow this person on {PHRASE}``"""
1✔
93
    OTHER_LABELS = ()
1✔
94
    """sequence of str: label aliases"""
1✔
95
    LOGO_HTML = ''
1✔
96
    """str: logo emoji or ``<img>`` tag"""
1✔
97
    CONTENT_TYPE = None
1✔
98
    """str: MIME type of this protocol's native data format, appropriate for the ``Content-Type`` HTTP header."""
1✔
99
    HAS_COPIES = False
1✔
100
    """bool: whether this protocol is push and needs us to proactively create "copy" users and objects, as opposed to pulling converted objects on demand"""
1✔
101
    DEFAULT_TARGET = None
1✔
102
    """str: optional, the default target URI to send this protocol's activities to. May be used as the "shared" target. Often only set if ``HAS_COPIES`` is true."""
1✔
103
    REQUIRES_AVATAR = False
1✔
104
    """bool: whether accounts on this protocol are required to have a profile picture. If they don't, their ``User.status`` will be ``blocked``."""
1✔
105
    REQUIRES_NAME = False
1✔
106
    """bool: whether accounts on this protocol are required to have a profile name that's different than their handle or id. If they don't, their ``User.status`` will be ``blocked``."""
1✔
107
    REQUIRES_OLD_ACCOUNT = False
1✔
108
    """bool: whether accounts on this protocol are required to be at least :const:`common.OLD_ACCOUNT_AGE` old. If their profile includes creation date and it's not old enough, their ``User.status`` will be ``blocked``."""
1✔
109
    DEFAULT_ENABLED_PROTOCOLS = ()
1✔
110
    """sequence of str: labels of other protocols that are automatically enabled for this protocol to bridge into"""
1✔
111
    DEFAULT_SERVE_USER_PAGES = False
1✔
112
    """bool: whether to serve user pages for all of this protocol's users on the fed.brid.gy. If ``False``, user pages will only be served for users who have explictly opted in."""
1✔
113
    SUPPORTED_AS1_TYPES = ()
1✔
114
    """sequence of str: AS1 objectTypes and verbs that this protocol supports receiving and sending"""
1✔
115
    SUPPORTS_DMS = False
1✔
116
    """bool: whether this protocol can receive DMs (chat messages)"""
1✔
117
    USES_OBJECT_FEED = False
1✔
118
    """bool: whether to store followers on this protocol in :attr:`Object.feed`."""
1✔
119
    HTML_PROFILES = True
1✔
120
    """bool: whether this protocol supports HTML in profile descriptions. If False, profile descriptions should be plain text."""
1✔
121

122
    def __init__(self):
1✔
123
        assert False
×
124

125
    @classmethod
1✔
126
    @property
1✔
127
    def LABEL(cls):
1✔
128
        """str: human-readable lower case name of this protocol, eg ``'activitypub``"""
129
        return cls.__name__.lower()
1✔
130

131
    @staticmethod
1✔
132
    def for_request(fed=None):
1✔
133
        """Returns the protocol for the current request.
134

135
        ...based on the request's hostname.
136

137
        Args:
138
          fed (str or protocol.Protocol): protocol to return if the current
139
            request is on ``fed.brid.gy``
140

141
        Returns:
142
          Protocol: protocol, or None if the provided domain or request hostname
143
          domain is not a subdomain of ``brid.gy`` or isn't a known protocol
144
        """
145
        return Protocol.for_bridgy_subdomain(request.host, fed=fed)
1✔
146

147
    @staticmethod
1✔
148
    def for_bridgy_subdomain(domain_or_url, fed=None):
1✔
149
        """Returns the protocol for a brid.gy subdomain.
150

151
        Args:
152
          domain_or_url (str)
153
          fed (str or protocol.Protocol): protocol to return if the current
154
            request is on ``fed.brid.gy``
155

156
        Returns:
157
          class: :class:`Protocol` subclass, or None if the provided domain or request
158
          hostname domain is not a subdomain of ``brid.gy`` or isn't a known
159
          protocol
160
        """
161
        domain = (util.domain_from_link(domain_or_url, minimize=False)
1✔
162
                  if util.is_web(domain_or_url)
163
                  else domain_or_url)
164

165
        if domain == common.PRIMARY_DOMAIN or domain in common.LOCAL_DOMAINS:
1✔
166
            return PROTOCOLS[fed] if isinstance(fed, str) else fed
1✔
167
        elif domain and domain.endswith(common.SUPERDOMAIN):
1✔
168
            label = domain.removesuffix(common.SUPERDOMAIN)
1✔
169
            return PROTOCOLS.get(label)
1✔
170

171
    @classmethod
1✔
172
    def owns_id(cls, id):
1✔
173
        """Returns whether this protocol owns the id, or None if it's unclear.
174

175
        To be implemented by subclasses.
176

177
        IDs are string identities that uniquely identify users, and are intended
178
        primarily to be machine readable and usable. Compare to handles, which
179
        are human-chosen, human-meaningful, and often but not always unique.
180

181
        Some protocols' ids are more or less deterministic based on the id
182
        format, eg AT Protocol owns ``at://`` URIs. Others, like http(s) URLs,
183
        could be owned by eg Web or ActivityPub.
184

185
        This should be a quick guess without expensive side effects, eg no
186
        external HTTP fetches to fetch the id itself or otherwise perform
187
        discovery.
188

189
        Returns False if the id's domain is in :const:`common.DOMAIN_BLOCKLIST`.
190

191
        Args:
192
          id (str)
193

194
        Returns:
195
          bool or None:
196
        """
197
        return False
1✔
198

199
    @classmethod
1✔
200
    def owns_handle(cls, handle, allow_internal=False):
1✔
201
        """Returns whether this protocol owns the handle, or None if it's unclear.
202

203
        To be implemented by subclasses.
204

205
        Handles are string identities that are human-chosen, human-meaningful,
206
        and often but not always unique. Compare to IDs, which uniquely identify
207
        users, and are intended primarily to be machine readable and usable.
208

209
        Some protocols' handles are more or less deterministic based on the id
210
        format, eg ActivityPub (technically WebFinger) handles are
211
        ``@user@instance.com``. Others, like domains, could be owned by eg Web,
212
        ActivityPub, AT Protocol, or others.
213

214
        This should be a quick guess without expensive side effects, eg no
215
        external HTTP fetches to fetch the id itself or otherwise perform
216
        discovery.
217

218
        Args:
219
          handle (str)
220
          allow_internal (bool): whether to return False for internal domains
221
            like ``fed.brid.gy``, ``bsky.brid.gy``, etc
222

223
        Returns:
224
          bool or None
225
        """
226
        return False
1✔
227

228
    @classmethod
1✔
229
    def handle_to_id(cls, handle):
1✔
230
        """Converts a handle to an id.
231

232
        To be implemented by subclasses.
233

234
        May incur network requests, eg DNS queries or HTTP requests. Avoids
235
        blocked or opted out users.
236

237
        Args:
238
          handle (str)
239

240
        Returns:
241
          str: corresponding id, or None if the handle can't be found
242
        """
243
        raise NotImplementedError()
×
244

245
    @classmethod
1✔
246
    def key_for(cls, id, allow_opt_out=False):
1✔
247
        """Returns the :class:`google.cloud.ndb.Key` for a given id's :class:`models.User`.
248

249
        To be implemented by subclasses. Canonicalizes the id if necessary.
250

251
        If called via `Protocol.key_for`, infers the appropriate protocol with
252
        :meth:`for_id`. If called with a concrete subclass, uses that subclass
253
        as is.
254

255
        Args:
256
          id (str):
257
          allow_opt_out (bool): whether to allow users who are currently opted out
258

259
        Returns:
260
          google.cloud.ndb.Key: matching key, or None if the given id is not a
261
          valid :class:`User` id for this protocol.
262
        """
263
        if cls == Protocol:
1✔
264
            proto = Protocol.for_id(id)
1✔
265
            return proto.key_for(id, allow_opt_out=allow_opt_out) if proto else None
1✔
266

267
        # load user so that we follow use_instead
268
        existing = cls.get_by_id(id, allow_opt_out=True)
1✔
269
        if existing:
1✔
270
            if existing.status and not allow_opt_out:
1✔
271
                return None
1✔
272
            return existing.key
1✔
273

274
        return cls(id=id).key
1✔
275

276
    @staticmethod
1✔
277
    def _for_id_memcache_key(id, remote=None):
1✔
278
        """If id is a URL, uses its domain, otherwise returns None.
279

280
        Args:
281
          id (str)
282

283
        Returns:
284
          (str domain, bool remote) or None
285
        """
286
        domain = util.domain_from_link(id)
1✔
287
        if domain in PROTOCOL_DOMAINS:
1✔
288
            return id
1✔
289
        elif remote and util.is_web(id):
1✔
290
            return domain
1✔
291

292
    @cached(LRUCache(20000), lock=Lock())
1✔
293
    @memcache.memoize(key=_for_id_memcache_key, write=lambda id, remote=True: remote,
1✔
294
                      version=3)
295
    @staticmethod
1✔
296
    def for_id(id, remote=True):
1✔
297
        """Returns the protocol for a given id.
298

299
        Args:
300
          id (str)
301
          remote (bool): whether to perform expensive side effects like fetching
302
            the id itself over the network, or other discovery.
303

304
        Returns:
305
          Protocol subclass: matching protocol, or None if no single known
306
          protocol definitively owns this id
307
        """
308
        logger.debug(f'Determining protocol for id {id}')
1✔
309
        if not id:
1✔
310
            return None
1✔
311

312
        # remove our synthetic id fragment, if any
313
        #
314
        # will this eventually cause false positives for other services that
315
        # include our full ids inside their own ids, non-URL-encoded? guess
316
        # we'll figure that out if/when it happens.
317
        id = id.partition('#bridgy-fed-')[0]
1✔
318
        if not id:
1✔
319
            return None
1✔
320

321
        if util.is_web(id):
1✔
322
            # step 1: check for our per-protocol subdomains
323
            try:
1✔
324
                parsed = urlparse(id)
1✔
325
            except ValueError as e:
1✔
326
                logger.info(f'urlparse ValueError: {e}')
1✔
327
                return None
1✔
328

329
            is_homepage = parsed.path.strip('/') == ''
1✔
330
            is_internal = parsed.path.startswith(ids.INTERNAL_PATH_PREFIX)
1✔
331
            by_subdomain = Protocol.for_bridgy_subdomain(id)
1✔
332
            if by_subdomain and not (is_homepage or is_internal
1✔
333
                                     or id in BOT_ACTOR_AP_IDS):
334
                logger.debug(f'  {by_subdomain.LABEL} owns id {id}')
1✔
335
                return by_subdomain
1✔
336

337
        # step 2: check if any Protocols say conclusively that they own it
338
        # sort to be deterministic
339
        protocols = sorted(set(p for p in PROTOCOLS.values() if p),
1✔
340
                           key=lambda p: p.LABEL)
341
        candidates = []
1✔
342
        for protocol in protocols:
1✔
343
            owns = protocol.owns_id(id)
1✔
344
            if owns:
1✔
345
                logger.debug(f'  {protocol.LABEL} owns id {id}')
1✔
346
                return protocol
1✔
347
            elif owns is not False:
1✔
348
                candidates.append(protocol)
1✔
349

350
        if len(candidates) == 1:
1✔
351
            logger.debug(f'  {candidates[0].LABEL} owns id {id}')
1✔
352
            return candidates[0]
1✔
353

354
        # step 3: look for existing Objects in the datastore
355
        #
356
        # note that we don't currently see if this is a copy id because I have FUD
357
        # over which Protocol for_id should return in that case...and also because a
358
        # protocol may already say definitively above that it owns the id, eg ATProto
359
        # with DIDs and at:// URIs.
360
        obj = Protocol.load(id, remote=False)
1✔
361
        if obj and obj.source_protocol:
1✔
362
            logger.debug(f'  {obj.key.id()} owned by source_protocol {obj.source_protocol}')
1✔
363
            return PROTOCOLS[obj.source_protocol]
1✔
364

365
        # step 4: fetch over the network, if necessary
366
        if not remote:
1✔
367
            return None
1✔
368

369
        for protocol in candidates:
1✔
370
            logger.debug(f'Trying {protocol.LABEL}')
1✔
371
            try:
1✔
372
                obj = protocol.load(id, local=False, remote=True)
1✔
373

374
                if protocol.ABBREV == 'web':
1✔
375
                    # for web, if we fetch and get HTML without microformats,
376
                    # load returns False but the object will be stored in the
377
                    # datastore with source_protocol web, and in cache. load it
378
                    # again manually to check for that.
379
                    obj = Object.get_by_id(id)
1✔
380
                    if obj and obj.source_protocol != 'web':
1✔
381
                        obj = None
×
382

383
                if obj:
1✔
384
                    logger.debug(f'  {protocol.LABEL} owns id {id}')
1✔
385
                    return protocol
1✔
386
            except BadGateway:
1✔
387
                # we tried and failed fetching the id over the network.
388
                # this depends on ActivityPub.fetch raising this!
389
                return None
1✔
390
            except HTTPException as e:
×
391
                # internal error we generated ourselves; try next protocol
392
                pass
×
393
            except Exception as e:
×
394
                code, _ = util.interpret_http_exception(e)
×
395
                if code:
×
396
                    # we tried and failed fetching the id over the network
397
                    return None
×
398
                raise
×
399

400
        logger.info(f'No matching protocol found for {id} !')
1✔
401
        return None
1✔
402

403
    @cached(LRUCache(20000), lock=Lock())
1✔
404
    @staticmethod
1✔
405
    def for_handle(handle):
1✔
406
        """Returns the protocol for a given handle.
407

408
        May incur expensive side effects like resolving the handle itself over
409
        the network or other discovery.
410

411
        Args:
412
          handle (str)
413

414
        Returns:
415
          (Protocol subclass, str) tuple: matching protocol and optional id (if
416
          resolved), or ``(None, None)`` if no known protocol owns this handle
417
        """
418
        # TODO: normalize, eg convert domains to lower case
419
        logger.debug(f'Determining protocol for handle {handle}')
1✔
420
        if not handle:
1✔
421
            return (None, None)
1✔
422

423
        # step 1: check if any Protocols say conclusively that they own it.
424
        # sort to be deterministic.
425
        protocols = sorted(set(p for p in PROTOCOLS.values() if p),
1✔
426
                           key=lambda p: p.LABEL)
427
        candidates = []
1✔
428
        for proto in protocols:
1✔
429
            owns = proto.owns_handle(handle)
1✔
430
            if owns:
1✔
431
                logger.debug(f'  {proto.LABEL} owns handle {handle}')
1✔
432
                return (proto, None)
1✔
433
            elif owns is not False:
1✔
434
                candidates.append(proto)
1✔
435

436
        if len(candidates) == 1:
1✔
437
            logger.debug(f'  {candidates[0].LABEL} owns handle {handle}')
×
438
            return (candidates[0], None)
×
439

440
        # step 2: look for matching User in the datastore
441
        for proto in candidates:
1✔
442
            user = proto.query(proto.handle == handle).get()
1✔
443
            if user:
1✔
444
                if user.status:
1✔
445
                    return (None, None)
1✔
446
                logger.debug(f'  user {user.key} handle {handle}')
1✔
447
                return (proto, user.key.id())
1✔
448

449
        # step 3: resolve handle to id
450
        for proto in candidates:
1✔
451
            id = proto.handle_to_id(handle)
1✔
452
            if id:
1✔
453
                logger.debug(f'  {proto.LABEL} resolved handle {handle} to id {id}')
1✔
454
                return (proto, id)
1✔
455

456
        logger.info(f'No matching protocol found for handle {handle} !')
1✔
457
        return (None, None)
1✔
458

459
    @classmethod
1✔
460
    def is_user_at_domain(cls, handle, allow_internal=False):
1✔
461
        """Returns True if handle is formatted ``user@domain.tld``, False otherwise.
462

463
        Example: ``@user@instance.com``
464

465
        Args:
466
          handle (str)
467
          allow_internal (bool): whether the domain can be a Bridgy Fed domain
468
        """
469
        parts = handle.split('@')
1✔
470
        if len(parts) != 2:
1✔
471
            return False
1✔
472

473
        user, domain = parts
1✔
474
        return bool(user and domain
1✔
475
                    and not cls.is_blocklisted(domain, allow_internal=allow_internal))
476

477
    @classmethod
1✔
478
    def bridged_web_url_for(cls, user, fallback=False):
1✔
479
        """Returns the web URL for a user's bridged profile in this protocol.
480

481
        For example, for Web user ``alice.com``, :meth:`ATProto.bridged_web_url_for`
482
        returns ``https://bsky.app/profile/alice.com.web.brid.gy``
483

484
        Args:
485
          user (models.User)
486
          fallback (bool): if True, and bridged users have no canonical user
487
            profile URL in this protocol, return the native protocol's profile URL
488

489
        Returns:
490
          str, or None if there isn't a canonical URL
491
        """
492
        if fallback:
1✔
493
            return user.web_url()
1✔
494

495
    @classmethod
1✔
496
    def actor_key(cls, obj, allow_opt_out=False):
1✔
497
        """Returns the :class:`User`: key for a given object's author or actor.
498

499
        Args:
500
          obj (models.Object)
501
          allow_opt_out (bool): whether to return a user key if they're opted out
502

503
        Returns:
504
          google.cloud.ndb.key.Key or None:
505
        """
506
        owner = as1.get_owner(obj.as1)
1✔
507
        if owner:
1✔
508
            return cls.key_for(owner, allow_opt_out=allow_opt_out)
1✔
509

510
    @classmethod
1✔
511
    def bot_user_id(cls):
1✔
512
        """Returns the Web user id for the bot user for this protocol.
513

514
        For example, ``'bsky.brid.gy'`` for ATProto.
515

516
        Returns:
517
          str:
518
        """
519
        return f'{cls.ABBREV}{common.SUPERDOMAIN}'
1✔
520

521
    @classmethod
1✔
522
    def create_for(cls, user):
1✔
523
        """Creates or re-activate a copy user in this protocol.
524

525
        Should add the copy user to :attr:`copies`.
526

527
        If the copy user already exists and active, should do nothing.
528

529
        Args:
530
          user (models.User): original source user. Shouldn't already have a
531
            copy user for this protocol in :attr:`copies`.
532

533
        Raises:
534
          ValueError: if we can't create a copy of the given user in this protocol
535
        """
536
        raise NotImplementedError()
×
537

538
    @classmethod
1✔
539
    def send(to_cls, obj, target, from_user=None, orig_obj_id=None):
1✔
540
        """Sends an outgoing activity.
541

542
        To be implemented by subclasses. Should call
543
        ``to_cls.translate_ids(obj.as1)`` before converting it to this Protocol's
544
        format.
545

546
        NOTE: if this protocol's ``HAS_COPIES`` is True, and this method creates
547
        a copy and sends it, it *must* add that copy to the *object*'s (not
548
        activity's) :attr:`copies`, and store it back in the datastore!
549

550
        Args:
551
          obj (models.Object): with activity to send
552
          target (str): destination URL to send to
553
          from_user (models.User): user (actor) this activity is from
554
          orig_obj_id (str): :class:`models.Object` key id of the "original object"
555
            that this object refers to, eg replies to or reposts or likes
556

557
        Returns:
558
          bool: True if the activity is sent successfully, False if it is
559
          ignored or otherwise unsent due to protocol logic, eg no webmention
560
          endpoint, protocol doesn't support the activity type. (Failures are
561
          raised as exceptions.)
562

563
        Raises:
564
          werkzeug.HTTPException if the request fails
565
        """
566
        raise NotImplementedError()
×
567

568
    @classmethod
1✔
569
    def fetch(cls, obj, **kwargs):
1✔
570
        """Fetches a protocol-specific object and populates it in an :class:`Object`.
571

572
        Errors are raised as exceptions. If this method returns False, the fetch
573
        didn't fail but didn't succeed either, eg the id isn't valid for this
574
        protocol, or the fetch didn't return valid data for this protocol.
575

576
        To be implemented by subclasses.
577

578
        Args:
579
          obj (models.Object): with the id to fetch. Data is filled into one of
580
            the protocol-specific properties, eg ``as2``, ``mf2``, ``bsky``.
581
          kwargs: subclass-specific
582

583
        Returns:
584
          bool: True if the object was fetched and populated successfully,
585
          False otherwise
586

587
        Raises:
588
          requests.RequestException, werkzeug.HTTPException,
589
          websockets.WebSocketException, etc: if the fetch fails
590
        """
591
        raise NotImplementedError()
×
592

593
    @classmethod
1✔
594
    def convert(cls, obj, from_user=None, **kwargs):
1✔
595
        """Converts an :class:`Object` to this protocol's data format.
596

597
        For example, an HTML string for :class:`Web`, or a dict with AS2 JSON
598
        and ``application/activity+json`` for :class:`ActivityPub`.
599

600
        Just passes through to :meth:`_convert`, then does minor
601
        protocol-independent postprocessing.
602

603
        Args:
604
          obj (models.Object):
605
          from_user (models.User): user (actor) this activity/object is from
606
          kwargs: protocol-specific, passed through to :meth:`_convert`
607

608
        Returns:
609
          converted object in the protocol's native format, often a dict
610
        """
611
        if not obj or not obj.as1:
1✔
612
            return {}
1✔
613

614
        id = obj.key.id() if obj.key else obj.as1.get('id')
1✔
615
        is_activity = obj.as1.get('verb') in as1.CRUD_VERBS
1✔
616
        base_obj = as1.get_object(obj.as1) if is_activity else obj.as1
1✔
617
        orig_our_as1 = obj.our_as1
1✔
618

619
        # mark bridged actors as bots and add "bridged by Bridgy Fed" to their bios
620
        if (from_user and base_obj
1✔
621
            and base_obj.get('objectType') in as1.ACTOR_TYPES
622
            and PROTOCOLS.get(obj.source_protocol) != cls
623
            and Protocol.for_bridgy_subdomain(id) not in DOMAINS
624
            # Web users are special cased, they don't get the label if they've
625
            # explicitly enabled Bridgy Fed with redirects or webmentions
626
            and not (from_user.LABEL == 'web'
627
                     and (from_user.last_webmention_in or from_user.has_redirects))):
628

629
            cls.add_source_links(obj=obj, from_user=from_user)
1✔
630

631
        converted = cls._convert(obj, from_user=from_user, **kwargs)
1✔
632
        obj.our_as1 = orig_our_as1
1✔
633
        return converted
1✔
634

635
    @classmethod
1✔
636
    def _convert(cls, obj, from_user=None, **kwargs):
1✔
637
        """Converts an :class:`Object` to this protocol's data format.
638

639
        To be implemented by subclasses. Implementations should generally call
640
        :meth:`Protocol.translate_ids` (as their own class) before converting to
641
        their format.
642

643
        Args:
644
          obj (models.Object):
645
          from_user (models.User): user (actor) this activity/object is from
646
          kwargs: protocol-specific
647

648
        Returns:
649
          converted object in the protocol's native format, often a dict. May
650
            return the ``{}`` empty dict if the object can't be converted.
651
        """
652
        raise NotImplementedError()
×
653

654
    @classmethod
1✔
655
    def add_source_links(cls, obj, from_user):
1✔
656
        """Adds "bridged from ... by Bridgy Fed" to the user's actor's ``summary``.
657

658
        Uses HTML for protocols that support it, plain text otherwise.
659

660
        Args:
661
          obj (models.Object): user's actor/profile object
662
          from_user (models.User): user (actor) this activity/object is from
663
        """
664
        assert obj and obj.as1
1✔
665
        assert from_user
1✔
666

667
        obj.our_as1 = copy.deepcopy(obj.as1)
1✔
668
        actor = (as1.get_object(obj.as1) if obj.as1.get('verb') in as1.CRUD_VERBS
1✔
669
                 else obj.as1)
670
        actor['objectType'] = 'person'
1✔
671

672
        orig_summary = actor.setdefault('summary', '')
1✔
673
        summary_text = html_to_text(orig_summary, ignore_links=True)
1✔
674

675
        # Check if we've already added source links
676
        if '🌉 bridged' in summary_text:
1✔
677
            return
1✔
678

679
        actor_id = actor.get('id')
1✔
680
        proto_phrase = (f' on {PROTOCOLS[obj.source_protocol].PHRASE}'
1✔
681
                        if obj.source_protocol else '')
682
        url = as1.get_url(actor) or obj.key.id() if obj.key else actor_id
1✔
683

684
        if cls.HTML_PROFILES:
1✔
685
            by = f' by <a href="https://{PRIMARY_DOMAIN}/">Bridgy Fed</a>'
1✔
686
            separator = '<br><br>'
1✔
687

688
            is_user = from_user.key and actor_id in (from_user.key.id(),
1✔
689
                                                     from_user.profile_id())
690
            if is_user:
1✔
691
                bridged = f'🌉 <a href="https://{PRIMARY_DOMAIN}{from_user.user_page_path()}">bridged</a>'
1✔
692
                from_ = f'<a href="{from_user.web_url()}">{from_user.handle}</a>'
1✔
693
            else:
694
                bridged = '🌉 bridged'
1✔
695
                from_ = util.pretty_link(url) if url else '?'
1✔
696

697
        else:  # plain text
698
            # TODO: unify with above. which is right?
699
            id = obj.key.id() if obj.key else obj.our_as1.get('id')
1✔
700
            is_user = from_user.key and id in (from_user.key.id(),
1✔
701
                                               from_user.profile_id())
702
            from_ = (from_user.web_url() if is_user else url) or '?'
1✔
703

704
            bridged = '🌉 bridged'
1✔
705
            by = (f': https://{PRIMARY_DOMAIN}{from_user.user_page_path()}'
1✔
706
                  # link web users to their user pages
707
                  if from_user.LABEL == 'web'
708
                  else f' by https://{PRIMARY_DOMAIN}/')
709
            separator = '\n\n'
1✔
710
            orig_summary = summary_text
1✔
711

712
        source_links = f'{separator if orig_summary else ""}{bridged} from {from_}{proto_phrase}{by}'
1✔
713
        actor['summary'] = orig_summary + source_links
1✔
714

715
    @classmethod
1✔
716
    def set_username(to_cls, user, username):
1✔
717
        """Sets a custom username for a user's bridged account in this protocol.
718

719
        Args:
720
          user (models.User)
721
          username (str)
722

723
        Raises:
724
          ValueError: if the username is invalid
725
          RuntimeError: if the username could not be set
726
        """
727
        raise NotImplementedError()
1✔
728

729
    @classmethod
1✔
730
    def migrate_out(cls, user, to_user_id):
1✔
731
        """Migrates a bridged account out to be a native account.
732

733
        Args:
734
          user (models.User)
735
          to_user_id (str)
736

737
        Raises:
738
          ValueError: eg if this protocol doesn't own ``to_user_id``, or if
739
            ``user`` is on this protocol or not bridged to this protocol
740
        """
741
        raise NotImplementedError()
×
742

743
    @classmethod
1✔
744
    def check_can_migrate_out(cls, user, to_user_id):
1✔
745
        """Raises an exception if a user can't yet migrate to a native account.
746

747
        For example, if ``to_user_id`` isn't on this protocol, or if ``user`` is on
748
        this protocol, or isn't bridged to this protocol.
749

750
        If the user is ready to migrate, returns ``None``.
751

752
        Subclasses may override this to add more criteria, but they should call this
753
        implementation first.
754

755
        Args:
756
          user (models.User)
757
          to_user_id (str)
758

759
        Raises:
760
          ValueError: if ``user`` isn't ready to migrate to this protocol yet
761
        """
762
        def _error(msg):
1✔
763
            logger.warning(msg)
1✔
764
            raise ValueError(msg)
1✔
765

766
        if cls.owns_id(to_user_id) is False:
1✔
767
            _error(f"{to_user_id} doesn't look like an {cls.LABEL} id")
1✔
768
        elif isinstance(user, cls):
1✔
769
            _error(f"{user.handle_or_id()} is on {cls.PHRASE}")
1✔
770
        elif not user.is_enabled(cls):
1✔
771
            _error(f"{user.handle_or_id()} isn't currently bridged to {cls.PHRASE}")
1✔
772

773
    @classmethod
1✔
774
    def migrate_in(cls, user, from_user_id, **kwargs):
1✔
775
        """Migrates a native account in to be a bridged account.
776

777
        Args:
778
          user (models.User): native user on another protocol to attach the
779
            newly imported bridged account to
780
          from_user_id (str)
781
          kwargs: additional protocol-specific parameters
782

783
        Raises:
784
          ValueError: eg if this protocol doesn't own ``from_user_id``, or if
785
            ``user`` is on this protocol or already bridged to this protocol
786
        """
787
        raise NotImplementedError()
×
788

789
    @classmethod
1✔
790
    def target_for(cls, obj, shared=False):
1✔
791
        """Returns an :class:`Object`'s delivery target (endpoint).
792

793
        To be implemented by subclasses.
794

795
        Examples:
796

797
        * If obj has ``source_protocol`` ``web``, returns its URL, as a
798
          webmention target.
799
        * If obj is an ``activitypub`` actor, returns its inbox.
800
        * If obj is an ``activitypub`` object, returns it's author's or actor's
801
          inbox.
802

803
        Args:
804
          obj (models.Object):
805
          shared (bool): optional. If True, returns a common/shared
806
            endpoint, eg ActivityPub's ``sharedInbox``, that can be reused for
807
            multiple recipients for efficiency
808

809
        Returns:
810
          str: target endpoint, or None if not available.
811
        """
812
        raise NotImplementedError()
×
813

814
    @classmethod
1✔
815
    def is_blocklisted(cls, url, allow_internal=False):
1✔
816
        """Returns True if we block the given URL and shouldn't deliver to it.
817

818
        Default implementation here, subclasses may override.
819

820
        Args:
821
          url (str):
822
          allow_internal (bool): whether to return False for internal domains
823
            like ``fed.brid.gy``, ``bsky.brid.gy``, etc
824
        """
825
        blocklist = DOMAIN_BLOCKLIST
1✔
826
        if not DEBUG:
1✔
827
            blocklist += tuple(util.RESERVED_TLDS | util.LOCAL_TLDS)
×
828
        if not allow_internal:
1✔
829
            blocklist += DOMAINS
1✔
830
        return util.domain_or_parent_in(url, blocklist)
1✔
831

832
    @classmethod
1✔
833
    def translate_ids(to_cls, obj):
1✔
834
        """Translates all ids in an AS1 object to a specific protocol.
835

836
        Infers source protocol for each id value separately.
837

838
        For example, if ``proto`` is :class:`ActivityPub`, the ATProto URI
839
        ``at://did:plc:abc/coll/123`` will be converted to
840
        ``https://bsky.brid.gy/ap/at://did:plc:abc/coll/123``.
841

842
        Wraps these AS1 fields:
843

844
        * ``id``
845
        * ``actor``
846
        * ``author``
847
        * ``bcc``
848
        * ``bto``
849
        * ``cc``
850
        * ``featured[].items``, ``featured[].orderedItems``
851
        * ``object``
852
        * ``object.actor``
853
        * ``object.author``
854
        * ``object.id``
855
        * ``object.inReplyTo``
856
        * ``object.object``
857
        * ``attachments[].id``
858
        * ``tags[objectType=mention].url``
859
        * ``to``
860

861
        This is the inverse of :meth:`models.Object.resolve_ids`. Much of the
862
        same logic is duplicated there!
863

864
        TODO: unify with :meth:`Object.resolve_ids`,
865
        :meth:`models.Object.normalize_ids`.
866

867
        Args:
868
          to_proto (Protocol subclass)
869
          obj (dict): AS1 object or activity (not :class:`models.Object`!)
870

871
        Returns:
872
          dict: wrapped AS1 version of ``obj``
873
        """
874
        assert to_cls != Protocol
1✔
875
        if not obj:
1✔
876
            return obj
1✔
877

878
        outer_obj = copy.deepcopy(obj)
1✔
879
        inner_objs = outer_obj['object'] = as1.get_objects(outer_obj)
1✔
880

881
        def translate(elem, field, fn, uri=False):
1✔
882
            elem[field] = as1.get_objects(elem, field)
1✔
883
            for obj in elem[field]:
1✔
884
                if id := obj.get('id'):
1✔
885
                    if field in ('to', 'cc', 'bcc', 'bto') and as1.is_audience(id):
1✔
886
                        continue
1✔
887
                    from_cls = Protocol.for_id(id)
1✔
888
                    # TODO: what if from_cls is None? relax translate_object_id,
889
                    # make it a noop if we don't know enough about from/to?
890
                    if from_cls and from_cls != to_cls:
1✔
891
                        obj['id'] = fn(id=id, from_=from_cls, to=to_cls)
1✔
892
                    if obj['id'] and uri:
1✔
893
                        obj['id'] = to_cls(id=obj['id']).id_uri()
1✔
894

895
            elem[field] = [o['id'] if o.keys() == {'id'} else o
1✔
896
                           for o in elem[field]]
897

898
            if len(elem[field]) == 1 and field not in ('items', 'orderedItems'):
1✔
899
                elem[field] = elem[field][0]
1✔
900

901
        type = as1.object_type(outer_obj)
1✔
902
        translate(outer_obj, 'id',
1✔
903
                  translate_user_id if type in as1.ACTOR_TYPES
904
                  else translate_object_id)
905

906
        for o in inner_objs:
1✔
907
            is_actor = (as1.object_type(o) in as1.ACTOR_TYPES
1✔
908
                        or as1.get_owner(outer_obj) == o.get('id')
909
                        or type in ('follow', 'stop-following'))
910
            translate(o, 'id', translate_user_id if is_actor else translate_object_id)
1✔
911
            obj_is_actor = o.get('verb') in as1.VERBS_WITH_ACTOR_OBJECT
1✔
912
            translate(o, 'object', translate_user_id if obj_is_actor
1✔
913
                      else translate_object_id)
914

915
        for o in [outer_obj] + inner_objs:
1✔
916
            translate(o, 'inReplyTo', translate_object_id)
1✔
917
            for field in 'actor', 'author', 'to', 'cc', 'bto', 'bcc':
1✔
918
                translate(o, field, translate_user_id)
1✔
919
            for tag in as1.get_objects(o, 'tags'):
1✔
920
                if tag.get('objectType') == 'mention':
1✔
921
                    translate(tag, 'url', translate_user_id, uri=True)
1✔
922
            for att in as1.get_objects(o, 'attachments'):
1✔
923
                translate(att, 'id', translate_object_id)
1✔
924
                url = att.get('url')
1✔
925
                if url and not att.get('id'):
1✔
926
                    if from_cls := Protocol.for_id(url):
1✔
927
                        att['id'] = translate_object_id(from_=from_cls, to=to_cls,
1✔
928
                                                        id=url)
929
            if feat := as1.get_object(o, 'featured'):
1✔
930
                translate(feat, 'orderedItems', translate_object_id)
1✔
931
                translate(feat, 'items', translate_object_id)
1✔
932

933
        outer_obj = util.trim_nulls(outer_obj)
1✔
934

935
        if objs := util.get_list(outer_obj ,'object'):
1✔
936
            outer_obj['object'] = [o['id'] if o.keys() == {'id'} else o for o in objs]
1✔
937
            if len(outer_obj['object']) == 1:
1✔
938
                outer_obj['object'] = outer_obj['object'][0]
1✔
939

940
        return outer_obj
1✔
941

942
    @classmethod
1✔
943
    def receive(from_cls, obj, authed_as=None, internal=False, received_at=None):
1✔
944
        """Handles an incoming activity.
945

946
        If ``obj``'s key is unset, ``obj.as1``'s id field is used. If both are
947
        unset, returns HTTP 299.
948

949
        Args:
950
          obj (models.Object)
951
          authed_as (str): authenticated actor id who sent this activity
952
          internal (bool): whether to allow activity ids on internal domains,
953
            from opted out/blocked users, etc.
954
          received_at (datetime): when we first saw (received) this activity.
955
            Right now only used for monitoring.
956

957
        Returns:
958
          (str, int) tuple: (response body, HTTP status code) Flask response
959

960
        Raises:
961
          werkzeug.HTTPException: if the request is invalid
962
        """
963
        # check some invariants
964
        assert from_cls != Protocol
1✔
965
        assert isinstance(obj, Object), obj
1✔
966

967
        if not obj.as1:
1✔
968
            error('No object data provided')
×
969

970
        id = None
1✔
971
        if obj.key and obj.key.id():
1✔
972
            id = obj.key.id()
1✔
973

974
        if not id:
1✔
975
            id = obj.as1.get('id')
1✔
976
            obj.key = ndb.Key(Object, id)
1✔
977

978
        if not id:
1✔
979
            error('No id provided')
×
980
        elif from_cls.owns_id(id) is False:
1✔
981
            error(f'Protocol {from_cls.LABEL} does not own id {id}')
1✔
982
        elif from_cls.is_blocklisted(id, allow_internal=internal):
1✔
983
            error(f'Activity {id} is blocklisted')
1✔
984
        # check that this activity is public. only do this for some activities,
985
        # not eg likes or follows, since Mastodon doesn't currently mark those
986
        # as explicitly public.
987
        elif (obj.type in set(('post', 'update')) | as1.POST_TYPES | as1.ACTOR_TYPES
1✔
988
                  and not as1.is_public(obj.as1, unlisted=False)
989
                  and not as1.is_dm(obj.as1)):
990
              logger.info('Dropping non-public activity')
1✔
991
              return ('OK', 200)
1✔
992

993
        # lease this object, atomically
994
        memcache_key = activity_id_memcache_key(id)
1✔
995
        leased = memcache.memcache.add(memcache_key, 'leased', noreply=False,
1✔
996
                                       expire=5 * 60)  # 5 min
997
        # short circuit if we've already seen this activity id.
998
        # (don't do this for bare objects since we need to check further down
999
        # whether they've been updated since we saw them last.)
1000
        if (obj.as1.get('objectType') == 'activity'
1✔
1001
            and 'force' not in request.values
1002
            and (not leased
1003
                 or (obj.new is False and obj.changed is False))):
1004
            error(f'Already seen this activity {id}', status=204)
1✔
1005

1006
        pruned = {k: v for k, v in obj.as1.items()
1✔
1007
                  if k not in ('contentMap', 'replies', 'signature')}
1008
        delay = ''
1✔
1009
        if (received_at and request.headers.get('X-AppEngine-TaskRetryCount') == '0'
1✔
1010
                and obj.type != 'delete'):  # we delay deletes for 2m
1011
            delay_s = int((util.now().replace(tzinfo=None)
×
1012
                           - received_at.replace(tzinfo=None)
1013
                           ).total_seconds())
1014
            delay = f'({delay_s} s behind)'
×
1015
        logger.info(f'Receiving {from_cls.LABEL} {obj.type} {id} {delay} AS1: {json_dumps(pruned, indent=2)}')
1✔
1016

1017
        # does this protocol support this activity/object type?
1018
        from_cls.check_supported(obj)
1✔
1019

1020
        # check authorization
1021
        # https://www.w3.org/wiki/ActivityPub/Primer/Authentication_Authorization
1022
        actor = as1.get_owner(obj.as1)
1✔
1023
        if not actor:
1✔
1024
            error('Activity missing actor or author')
1✔
1025
        elif from_cls.owns_id(actor) is False:
1✔
1026
            error(f"{from_cls.LABEL} doesn't own actor {actor}, this is probably a bridged activity. Skipping.", status=204)
1✔
1027

1028
        assert authed_as
1✔
1029
        assert isinstance(authed_as, str)
1✔
1030
        authed_as = normalize_user_id(id=authed_as, proto=from_cls)
1✔
1031
        actor = normalize_user_id(id=actor, proto=from_cls)
1✔
1032
        if actor != authed_as:
1✔
1033
            report_error("Auth: receive: authed_as doesn't match owner",
1✔
1034
                         user=f'{id} authed_as {authed_as} owner {actor}')
1035
            error(f"actor {actor} isn't authed user {authed_as}")
1✔
1036

1037
        # update copy ids to originals
1038
        obj.normalize_ids()
1✔
1039
        obj.resolve_ids()
1✔
1040

1041
        if (obj.type == 'follow'
1✔
1042
                and Protocol.for_bridgy_subdomain(as1.get_object(obj.as1).get('id'))):
1043
            # follows of bot user; refresh user profile first
1044
            logger.info(f'Follow of bot user, reloading {actor}')
1✔
1045
            from_user = from_cls.get_or_create(id=actor, allow_opt_out=True)
1✔
1046
            from_user.reload_profile()
1✔
1047
        else:
1048
            # load actor user
1049
            from_user = from_cls.get_or_create(id=actor, allow_opt_out=internal)
1✔
1050

1051
        if not internal and (not from_user or from_user.manual_opt_out):
1✔
1052
            error(f"Couldn't load actor {actor}", status=204)
1✔
1053

1054
        # if this is an object, ie not an activity, wrap it in a create or update
1055
        obj = from_cls.handle_bare_object(obj, authed_as=authed_as)
1✔
1056
        obj.add('users', from_user.key)
1✔
1057

1058
        inner_obj_as1 = as1.get_object(obj.as1)
1✔
1059
        inner_obj_id = inner_obj_as1.get('id')
1✔
1060
        if obj.type in as1.CRUD_VERBS | as1.VERBS_WITH_OBJECT:
1✔
1061
            if not inner_obj_id:
1✔
1062
                error(f'{obj.type} object has no id!')
1✔
1063

1064
        # check age. we support backdated posts, but if they're over 2w old, we
1065
        # don't deliver them
1066
        if obj.type == 'post':
1✔
1067
            if published := inner_obj_as1.get('published'):
1✔
1068
                try:
×
1069
                    published_dt = util.parse_iso8601(published)
×
1070
                    if not published_dt.tzinfo:
×
1071
                        published_dt = published_dt.replace(tzinfo=timezone.utc)
×
1072
                    age = util.now() - published_dt
×
1073
                    if age > CREATE_MAX_AGE:
×
1074
                        error(f'Ignoring, too old, {age} is over {CREATE_MAX_AGE}',
×
1075
                              status=204)
1076
                except ValueError:  # from parse_iso8601
×
1077
                    logger.debug(f"Couldn't parse published {published}")
×
1078

1079
        # write Object to datastore
1080
        obj.source_protocol = from_cls.LABEL
1✔
1081
        if obj.type in STORE_AS1_TYPES:
1✔
1082
            obj.put()
1✔
1083

1084
        # store inner object
1085
        # TODO: unify with big obj.type conditional below. would have to merge
1086
        # this with the DM handling block lower down.
1087
        crud_obj = None
1✔
1088
        if obj.type in ('post', 'update') and inner_obj_as1.keys() > set(['id']):
1✔
1089
            crud_obj = Object.get_or_create(inner_obj_id, our_as1=inner_obj_as1,
1✔
1090
                                            source_protocol=from_cls.LABEL,
1091
                                            authed_as=actor, users=[from_user.key],
1092
                                            deleted=False)
1093

1094
        actor = as1.get_object(obj.as1, 'actor')
1✔
1095
        actor_id = actor.get('id')
1✔
1096

1097
        # handle activity!
1098
        if obj.type == 'stop-following':
1✔
1099
            # TODO: unify with handle_follow?
1100
            # TODO: handle multiple followees
1101
            if not actor_id or not inner_obj_id:
1✔
1102
                error(f'stop-following requires actor id and object id. Got: {actor_id} {inner_obj_id} {obj.as1}')
×
1103

1104
            # deactivate Follower
1105
            from_ = from_cls.key_for(actor_id)
1✔
1106
            to_cls = Protocol.for_id(inner_obj_id)
1✔
1107
            to = to_cls.key_for(inner_obj_id)
1✔
1108
            follower = Follower.query(Follower.to == to,
1✔
1109
                                      Follower.from_ == from_,
1110
                                      Follower.status == 'active').get()
1111
            if follower:
1✔
1112
                logger.info(f'Marking {follower} inactive')
1✔
1113
                follower.status = 'inactive'
1✔
1114
                follower.put()
1✔
1115
            else:
1116
                logger.warning(f'No Follower found for {from_} => {to}')
1✔
1117

1118
            # fall through to deliver to followee
1119
            # TODO: do we convert stop-following to webmention 410 of original
1120
            # follow?
1121

1122
            # fall through to deliver to followers
1123

1124
        elif obj.type in ('delete', 'undo'):
1✔
1125
            delete_obj_id = (from_user.profile_id()
1✔
1126
                            if inner_obj_id == from_user.key.id()
1127
                            else inner_obj_id)
1128

1129
            delete_obj = Object.get_by_id(delete_obj_id, authed_as=authed_as)
1✔
1130
            if not delete_obj:
1✔
1131
                logger.info(f"Ignoring, we don't have {delete_obj_id} stored")
1✔
1132
                return 'OK', 204
1✔
1133

1134
            # TODO: just delete altogether!
1135
            logger.info(f'Marking Object {delete_obj_id} deleted')
1✔
1136
            delete_obj.deleted = True
1✔
1137
            delete_obj.put()
1✔
1138

1139
            # if this is an actor, handle deleting it later so that
1140
            # in case it's from_user, user.enabled_protocols is still populated
1141
            #
1142
            # fall through to deliver to followers and delete copy if necessary.
1143
            # should happen via protocol-specific copy target and send of
1144
            # delete activity.
1145
            # https://github.com/snarfed/bridgy-fed/issues/63
1146

1147
        elif obj.type == 'block':
1✔
1148
            if proto := Protocol.for_bridgy_subdomain(inner_obj_id):
1✔
1149
                # blocking protocol bot user disables that protocol
1150
                from_user.delete(proto)
1✔
1151
                from_user.disable_protocol(proto)
1✔
1152
                return 'OK', 200
1✔
1153

1154
        elif obj.type == 'post':
1✔
1155
            # handle DMs to bot users
1156
            if as1.is_dm(obj.as1):
1✔
1157
                return dms.receive(from_user=from_user, obj=obj)
1✔
1158

1159
        # fetch actor if necessary
1160
        if (actor and actor.keys() == set(['id'])
1✔
1161
                and obj.type not in ('delete', 'undo')):
1162
            logger.debug('Fetching actor so we have name, profile photo, etc')
1✔
1163
            actor_obj = from_cls.load(ids.profile_id(id=actor['id'], proto=from_cls),
1✔
1164
                                      raise_=False)
1165
            if actor_obj and actor_obj.as1:
1✔
1166
                obj.our_as1 = {
1✔
1167
                    **obj.as1, 'actor': {
1168
                        **actor_obj.as1,
1169
                        # override profile id with actor id
1170
                        # https://github.com/snarfed/bridgy-fed/issues/1720
1171
                        'id': actor['id'],
1172
                    }
1173
                }
1174

1175
        # fetch object if necessary
1176
        if (obj.type in ('post', 'update', 'share')
1✔
1177
                and inner_obj_as1.keys() == set(['id'])
1178
                and from_cls.owns_id(inner_obj_id)):
1179
            logger.debug('Fetching inner object')
1✔
1180
            inner_obj = from_cls.load(inner_obj_id, raise_=False,
1✔
1181
                                      remote=(obj.type in ('post', 'update')))
1182
            if obj.type in ('post', 'update'):
1✔
1183
                crud_obj = inner_obj
1✔
1184
            if inner_obj and inner_obj.as1:
1✔
1185
                obj.our_as1 = {
1✔
1186
                    **obj.as1,
1187
                    'object': {
1188
                        **inner_obj_as1,
1189
                        **inner_obj.as1,
1190
                    }
1191
                }
1192
            elif obj.type in ('post', 'update'):
1✔
1193
                error("Need object {inner_obj_id} but couldn't fetch, giving up")
1✔
1194

1195
        if obj.type == 'follow':
1✔
1196
            if proto := Protocol.for_bridgy_subdomain(inner_obj_id):
1✔
1197
                # follow of one of our protocol bot users; enable that protocol.
1198
                # fall through so that we send an accept.
1199
                try:
1✔
1200
                    from_user.enable_protocol(proto)
1✔
1201
                except ErrorButDoNotRetryTask:
1✔
1202
                    from web import Web
1✔
1203
                    bot = Web.get_by_id(proto.bot_user_id())
1✔
1204
                    from_cls.respond_to_follow('reject', follower=from_user,
1✔
1205
                                               followee=bot, follow=obj)
1206
                    raise
1✔
1207
                proto.bot_follow(from_user)
1✔
1208

1209
            from_cls.handle_follow(obj)
1✔
1210

1211
        # deliver to targets
1212
        resp = from_cls.deliver(obj, from_user=from_user, crud_obj=crud_obj)
1✔
1213

1214
        # if this is a user, deactivate its followers/followings
1215
        # https://github.com/snarfed/bridgy-fed/issues/1304
1216
        if obj.type == 'delete':
1✔
1217
            if user_key := from_cls.key_for(id=inner_obj_id):
1✔
1218
                if user := user_key.get():
1✔
1219
                    for proto in user.enabled_protocols:
1✔
1220
                        user.disable_protocol(PROTOCOLS[proto])
1✔
1221

1222
                    logger.info(f'Deactivating Followers from or to {user_key.id()}')
1✔
1223
                    followers = Follower.query(
1✔
1224
                        OR(Follower.to == user_key, Follower.from_ == user_key)
1225
                        ).fetch()
1226
                    for f in followers:
1✔
1227
                        f.status = 'inactive'
1✔
1228
                    ndb.put_multi(followers)
1✔
1229

1230
        memcache.memcache.set(memcache_key, 'done', expire=7 * 24 * 60 * 60)  # 1w
1✔
1231
        return resp
1✔
1232

1233
    @classmethod
1✔
1234
    def handle_follow(from_cls, obj):
1✔
1235
        """Handles an incoming follow activity.
1236

1237
        Sends an ``Accept`` back, but doesn't send the ``Follow`` itself. That
1238
        happens in :meth:`deliver`.
1239

1240
        Args:
1241
          obj (models.Object): follow activity
1242
        """
1243
        logger.debug('Got follow. Loading users, storing Follow(s), sending accept(s)')
1✔
1244

1245
        # Prepare follower (from) users' data
1246
        # TODO: remove all of this and just use from_user
1247
        from_as1 = as1.get_object(obj.as1, 'actor')
1✔
1248
        from_id = from_as1.get('id')
1✔
1249
        if not from_id:
1✔
1250
            error(f'Follow activity requires actor. Got: {obj.as1}')
×
1251

1252
        from_obj = from_cls.load(from_id, raise_=False)
1✔
1253
        if not from_obj:
1✔
1254
            error(f"Couldn't load {from_id}", status=502)
×
1255

1256
        if not from_obj.as1:
1✔
1257
            from_obj.our_as1 = from_as1
1✔
1258
            from_obj.put()
1✔
1259

1260
        from_key = from_cls.key_for(from_id)
1✔
1261
        if not from_key:
1✔
1262
            error(f'Invalid {from_cls.LABEL} user key: {from_id}')
×
1263
        obj.users = [from_key]
1✔
1264
        from_user = from_cls.get_or_create(id=from_key.id(), obj=from_obj)
1✔
1265

1266
        # Prepare followee (to) users' data
1267
        to_as1s = as1.get_objects(obj.as1)
1✔
1268
        if not to_as1s:
1✔
1269
            error(f'Follow activity requires object(s). Got: {obj.as1}')
×
1270

1271
        # Store Followers
1272
        for to_as1 in to_as1s:
1✔
1273
            to_id = to_as1.get('id')
1✔
1274
            if not to_id:
1✔
1275
                error(f'Follow activity requires object(s). Got: {obj.as1}')
×
1276

1277
            logger.info(f'Follow {from_id} => {to_id}')
1✔
1278

1279
            to_cls = Protocol.for_id(to_id)
1✔
1280
            if not to_cls:
1✔
1281
                error(f"Couldn't determine protocol for {to_id}")
×
1282
            elif from_cls == to_cls:
1✔
1283
                logger.info(f'Skipping same-protocol Follower {from_id} => {to_id}')
1✔
1284
                continue
1✔
1285

1286
            to_obj = to_cls.load(to_id)
1✔
1287
            if to_obj and not to_obj.as1:
1✔
1288
                to_obj.our_as1 = to_as1
1✔
1289
                to_obj.put()
1✔
1290

1291
            to_key = to_cls.key_for(to_id)
1✔
1292
            if not to_key:
1✔
1293
                logger.info(f'Skipping invalid {from_cls.LABEL} user key: {from_id}')
×
1294
                continue
×
1295

1296
            to_user = to_cls.get_or_create(id=to_key.id(), obj=to_obj,
1✔
1297
                                           allow_opt_out=True)
1298
            follower_obj = Follower.get_or_create(to=to_user, from_=from_user,
1✔
1299
                                                  follow=obj.key, status='active')
1300
            obj.add('notify', to_key)
1✔
1301
            from_cls.respond_to_follow('accept', follower=from_user,
1✔
1302
                                       followee=to_user, follow=obj)
1303

1304
    @classmethod
1✔
1305
    def respond_to_follow(_, verb, follower, followee, follow):
1✔
1306
        """Sends an accept or reject activity for a follow.
1307

1308
        ...if the follower's protocol supports accepts/rejects. Otherwise, does
1309
        nothing.
1310

1311
        Args:
1312
          verb (str): ``accept`` or  ``reject``
1313
          follower (models.User)
1314
          followee (models.User)
1315
          follow (models.Object)
1316
        """
1317
        assert verb in ('accept', 'reject')
1✔
1318
        if verb not in follower.SUPPORTED_AS1_TYPES:
1✔
1319
            return
1✔
1320

1321
        target = follower.target_for(follower.obj)
1✔
1322
        if not target:
1✔
1323
            error(f"Couldn't find delivery target for follower {follower.key.id()}")
×
1324

1325
        # send. note that this is one response for the whole follow, even if it
1326
        # has multiple followees!
1327
        id = f'{followee.key.id()}/followers#{verb}-{follow.key.id()}'
1✔
1328
        accept = {
1✔
1329
            'id': id,
1330
            'objectType': 'activity',
1331
            'verb': verb,
1332
            'actor': followee.key.id(),
1333
            'object': follow.as1,
1334
        }
1335
        common.create_task(queue='send', id=id, our_as1=accept, url=target,
1✔
1336
                           protocol=follower.LABEL, user=followee.key.urlsafe())
1337

1338
    @classmethod
1✔
1339
    def bot_follow(bot_cls, user):
1✔
1340
        """Follow a user from a protocol bot user.
1341

1342
        ...so that the protocol starts sending us their activities, if it needs
1343
        a follow for that (eg ActivityPub).
1344

1345
        Args:
1346
          user (User)
1347
        """
1348
        from web import Web
1✔
1349
        bot = Web.get_by_id(bot_cls.bot_user_id())
1✔
1350
        now = util.now().isoformat()
1✔
1351
        logger.info(f'Following {user.key.id()} back from bot user {bot.key.id()}')
1✔
1352

1353
        if not user.obj:
1✔
1354
            logger.info("  can't follow, user has no profile obj")
1✔
1355
            return
1✔
1356

1357
        target = user.target_for(user.obj)
1✔
1358
        follow_back_id = f'https://{bot.key.id()}/#follow-back-{user.key.id()}-{now}'
1✔
1359
        follow_back_as1 = {
1✔
1360
            'objectType': 'activity',
1361
            'verb': 'follow',
1362
            'id': follow_back_id,
1363
            'actor': bot.key.id(),
1364
            'object': user.key.id(),
1365
        }
1366
        common.create_task(queue='send', id=follow_back_id,
1✔
1367
                           our_as1=follow_back_as1, url=target,
1368
                           source_protocol='web', protocol=user.LABEL,
1369
                           user=bot.key.urlsafe())
1370

1371
    @classmethod
1✔
1372
    def handle_bare_object(cls, obj, authed_as=None):
1✔
1373
        """If obj is a bare object, wraps it in a create or update activity.
1374

1375
        Checks if we've seen it before.
1376

1377
        Args:
1378
          obj (models.Object)
1379
          authed_as (str): authenticated actor id who sent this activity
1380

1381
        Returns:
1382
          models.Object: ``obj`` if it's an activity, otherwise a new object
1383
        """
1384
        is_actor = obj.type in as1.ACTOR_TYPES
1✔
1385
        if not is_actor and obj.type not in ('note', 'article', 'comment'):
1✔
1386
            return obj
1✔
1387

1388
        obj_actor = ids.normalize_user_id(id=as1.get_owner(obj.as1), proto=cls)
1✔
1389
        now = util.now().isoformat()
1✔
1390

1391
        # occasionally we override the object, eg if this is a profile object
1392
        # coming in via a user with use_instead set
1393
        obj_as1 = obj.as1
1✔
1394
        if obj_id := obj.key.id():
1✔
1395
            if obj_as1_id := obj_as1.get('id'):
1✔
1396
                if obj_id != obj_as1_id:
1✔
1397
                    logger.info(f'Overriding AS1 object id {obj_as1_id} with Object id {obj_id}')
1✔
1398
                    obj_as1['id'] = obj_id
1✔
1399

1400
        # this is a raw post; wrap it in a create or update activity
1401
        if obj.changed or is_actor:
1✔
1402
            if obj.changed:
1✔
1403
                logger.info(f'Content has changed from last time at {obj.updated}! Redelivering to all inboxes')
1✔
1404
            else:
1405
                logger.info(f'Got actor profile object, wrapping in update')
1✔
1406
            id = f'{obj.key.id()}#bridgy-fed-update-{now}'
1✔
1407
            update_as1 = {
1✔
1408
                'objectType': 'activity',
1409
                'verb': 'update',
1410
                'id': id,
1411
                'actor': obj_actor,
1412
                'object': {
1413
                    # Mastodon requires the updated field for Updates, so
1414
                    # add a default value.
1415
                    # https://docs.joinmastodon.org/spec/activitypub/#supported-activities-for-statuses
1416
                    # https://socialhub.activitypub.rocks/t/what-could-be-the-reason-that-my-update-activity-does-not-work/2893/4
1417
                    # https://github.com/mastodon/documentation/pull/1150
1418
                    'updated': now,
1419
                    **obj_as1,
1420
                },
1421
            }
1422
            logger.debug(f'  AS1: {json_dumps(update_as1, indent=2)}')
1✔
1423
            return Object(id=id, our_as1=update_as1,
1✔
1424
                          source_protocol=obj.source_protocol)
1425

1426
        if (obj.new
1✔
1427
                # HACK: force query param here is specific to webmention
1428
                or 'force' in request.form):
1429
            create_id = f'{obj.key.id()}#bridgy-fed-create'
1✔
1430
            create_as1 = {
1✔
1431
                'objectType': 'activity',
1432
                'verb': 'post',
1433
                'id': create_id,
1434
                'actor': obj_actor,
1435
                'object': obj_as1,
1436
                'published': now,
1437
            }
1438
            logger.info(f'Wrapping in post')
1✔
1439
            logger.debug(f'  AS1: {json_dumps(create_as1, indent=2)}')
1✔
1440
            return Object(id=create_id, our_as1=create_as1,
1✔
1441
                          source_protocol=obj.source_protocol)
1442

1443
        error(f'{obj.key.id()} is unchanged, nothing to do', status=204)
1✔
1444

1445
    @classmethod
1✔
1446
    def deliver(from_cls, obj, from_user, crud_obj=None, to_proto=None):
1✔
1447
        """Delivers an activity to its external recipients.
1448

1449
        Args:
1450
          obj (models.Object): activity to deliver
1451
          from_user (models.User): user (actor) this activity is from
1452
          crud_obj (models.Object): if this is a create, update, or delete/undo
1453
            activity, the inner object that's being written, otherwise None.
1454
            (This object's ``notify`` and ``feed`` properties may be updated.)
1455
          to_proto (protocol.Protocol): optional; if provided, only deliver to
1456
            targets on this protocol
1457

1458
        Returns:
1459
          (str, int) tuple: Flask response
1460
        """
1461
        if to_proto:
1✔
1462
            logger.info(f'Only delivering to {to_proto.LABEL}')
1✔
1463

1464
        # find delivery targets. maps Target to Object or None
1465
        #
1466
        # ...then write the relevant object, since targets() has a side effect of
1467
        # setting the notify and feed properties (and dirty attribute)
1468
        targets = from_cls.targets(obj, from_user=from_user, crud_obj=crud_obj)
1✔
1469
        if to_proto:
1✔
1470
            targets = {t: obj for t, obj in targets.items()
1✔
1471
                       if t.protocol == to_proto.LABEL}
1472
        if not targets:
1✔
1473
            return r'No targets, nothing to do ¯\_(ツ)_/¯', 204
1✔
1474

1475
        # store object that targets() updated
1476
        if crud_obj and crud_obj.dirty:
1✔
1477
            crud_obj.put()
1✔
1478
        elif obj.type in STORE_AS1_TYPES and obj.dirty:
1✔
1479
            obj.put()
1✔
1480

1481
        obj_params = ({'obj_id': obj.key.id()} if obj.type in STORE_AS1_TYPES
1✔
1482
                      else obj.to_request())
1483

1484
        # sort targets so order is deterministic for tests, debugging, etc
1485
        sorted_targets = sorted(targets.items(), key=lambda t: t[0].uri)
1✔
1486

1487
        # enqueue send task for each targets
1488
        logger.info(f'Delivering to: {[t for t, _ in sorted_targets]}')
1✔
1489
        user = from_user.key.urlsafe()
1✔
1490
        for i, (target, orig_obj) in enumerate(sorted_targets):
1✔
1491
            orig_obj_id = orig_obj.key.id() if orig_obj else None
1✔
1492
            common.create_task(queue='send', url=target.uri, protocol=target.protocol,
1✔
1493
                               orig_obj_id=orig_obj_id, user=user, **obj_params)
1494

1495
        return 'OK', 202
1✔
1496

1497
    @classmethod
1✔
1498
    def targets(from_cls, obj, from_user, crud_obj=None, internal=False):
1✔
1499
        """Collects the targets to send a :class:`models.Object` to.
1500

1501
        Targets are both objects - original posts, events, etc - and actors.
1502

1503
        Args:
1504
          obj (models.Object)
1505
          from_user (User)
1506
          crud_obj (models.Object): if this is a create, update, or delete/undo
1507
            activity, the inner object that's being written, otherwise None.
1508
            (This object's ``notify`` and ``feed`` properties may be updated.)
1509
          internal (bool): whether this is a recursive internal call
1510

1511
        Returns:
1512
          dict: maps :class:`models.Target` to original (in response to)
1513
          :class:`models.Object`, if any, otherwise None
1514
        """
1515
        logger.debug('Finding recipients and their targets')
1✔
1516

1517
        # we should only have crud_obj iff this is a create or update
1518
        assert (crud_obj is not None) == (obj.type in ('post', 'update')), obj.type
1✔
1519
        write_obj = crud_obj or obj
1✔
1520
        write_obj.dirty = False
1✔
1521

1522
        target_uris = sorted(set(as1.targets(obj.as1)))
1✔
1523
        logger.info(f'Raw targets: {target_uris}')
1✔
1524
        orig_obj = None
1✔
1525
        targets = {}  # maps Target to Object or None
1✔
1526
        owner = as1.get_owner(obj.as1)
1✔
1527
        allow_opt_out = (obj.type == 'delete')
1✔
1528
        inner_obj_as1 = as1.get_object(obj.as1)
1✔
1529
        inner_obj_id = inner_obj_as1.get('id')
1✔
1530
        in_reply_tos = as1.get_ids(inner_obj_as1, 'inReplyTo')
1✔
1531
        quoted_posts = as1.quoted_posts(inner_obj_as1)
1✔
1532
        mentioned_urls = as1.mentions(inner_obj_as1)
1✔
1533
        is_reply = obj.type == 'comment' or in_reply_tos
1✔
1534
        is_self_reply = False
1✔
1535

1536
        original_ids = []
1✔
1537
        if is_reply:
1✔
1538
            original_ids = in_reply_tos
1✔
1539
        elif inner_obj_id:
1✔
1540
            if inner_obj_id == from_user.key.id():
1✔
1541
                inner_obj_id = from_user.profile_id()
1✔
1542
            original_ids = [inner_obj_id]
1✔
1543

1544
        # which protocols should we allow delivering to?
1545
        to_protocols = []
1✔
1546
        for label in (list(from_user.DEFAULT_ENABLED_PROTOCOLS)
1✔
1547
                      + from_user.enabled_protocols):
1548
            if not (proto := PROTOCOLS.get(label)):
1✔
1549
                report_error(f'unknown enabled protocol {label} for {from_user.key.id()}')
1✔
1550
                continue
1✔
1551

1552
            if proto.HAS_COPIES and (obj.type in ('update', 'delete', 'share', 'undo')
1✔
1553
                                     or is_reply):
1554
                for id in original_ids:
1✔
1555
                    if Protocol.for_id(id) == proto:
1✔
1556
                        logger.info(f'Allowing {label} for original post {id}')
1✔
1557
                        break
1✔
1558
                    elif orig := from_user.load(id, remote=False):
1✔
1559
                        if orig.get_copy(proto):
1✔
1560
                            logger.info(f'Allowing {label}, original post {id} was bridged there')
1✔
1561
                            break
1✔
1562
                else:
1563
                    logger.info(f"Skipping {label}, original objects {original_ids} weren't bridged there")
1✔
1564
                    continue
1✔
1565

1566
            util.add(to_protocols, proto)
1✔
1567

1568
        # process direct targets
1569
        for id in sorted(target_uris):
1✔
1570
            target_proto = Protocol.for_id(id)
1✔
1571
            if not target_proto:
1✔
1572
                logger.info(f"Can't determine protocol for {id}")
1✔
1573
                continue
1✔
1574
            elif target_proto.is_blocklisted(id):
1✔
1575
                logger.debug(f'{id} is blocklisted')
1✔
1576
                continue
1✔
1577

1578
            orig_obj = target_proto.load(id, raise_=False)
1✔
1579
            if not orig_obj or not orig_obj.as1:
1✔
1580
                logger.info(f"Couldn't load {id}")
1✔
1581
                continue
1✔
1582

1583
            target_author_key = (target_proto(id=id).key if id in mentioned_urls
1✔
1584
                                 else target_proto.actor_key(orig_obj))
1585
            if not from_user.is_enabled(target_proto):
1✔
1586
                # if author isn't bridged and target user is, DM a prompt and
1587
                # add a notif for the target user
1588
                if (id in (in_reply_tos + quoted_posts + mentioned_urls)
1✔
1589
                        and target_author_key):
1590
                    if target_author := target_author_key.get():
1✔
1591
                        if target_author.is_enabled(from_cls):
1✔
1592
                            notifications.add_notification(target_author, write_obj)
1✔
1593
                            verb, noun = (
1✔
1594
                                ('replied to', 'replies') if id in in_reply_tos
1595
                                else ('quoted', 'quotes') if id in quoted_posts
1596
                                else ('mentioned', 'mentions'))
1597
                            dms.maybe_send(
1✔
1598
                                from_proto=target_proto, to_user=from_user,
1599
                                type='replied_to_bridged_user', text=f"""\
1600
Hi! You <a href="{inner_obj_as1.get('url') or inner_obj_id}">recently {verb}</a> {target_author.user_link()}, who's bridged here from {target_proto.PHRASE}. If you want them to see your {noun}, you can bridge your account into {target_proto.PHRASE} by following this account. <a href="https://fed.brid.gy/docs">See the docs</a> for more information.""")
1601

1602
                continue
1✔
1603

1604
            # deliver self-replies to followers
1605
            # https://github.com/snarfed/bridgy-fed/issues/639
1606
            if id in in_reply_tos and owner == as1.get_owner(orig_obj.as1):
1✔
1607
                is_self_reply = True
1✔
1608
                logger.info(f'self reply!')
1✔
1609

1610
            # also add copies' targets
1611
            for copy in orig_obj.copies:
1✔
1612
                proto = PROTOCOLS[copy.protocol]
1✔
1613
                if proto in to_protocols:
1✔
1614
                    # copies generally won't have their own Objects
1615
                    if target := proto.target_for(Object(id=copy.uri)):
1✔
1616
                        logger.debug(f'Adding target {target} for copy {copy.uri} of original {id}')
1✔
1617
                        targets[Target(protocol=copy.protocol, uri=target)] = orig_obj
1✔
1618

1619
            if target_proto == from_cls:
1✔
1620
                logger.debug(f'Skipping same-protocol target {id}')
1✔
1621
                continue
1✔
1622

1623
            target = target_proto.target_for(orig_obj)
1✔
1624
            if not target:
1✔
1625
                # TODO: surface errors like this somehow?
1626
                logger.error(f"Can't find delivery target for {id}")
×
UNCOV
1627
                continue
×
1628

1629
            logger.debug(f'Target for {id} is {target}')
1✔
1630
            # only use orig_obj for inReplyTos, like/repost objects, etc
1631
            # https://github.com/snarfed/bridgy-fed/issues/1237
1632
            targets[Target(protocol=target_proto.LABEL, uri=target)] = (
1✔
1633
                orig_obj if id in in_reply_tos or id in as1.get_ids(obj.as1, 'object')
1634
                else None)
1635

1636
            if target_author_key:
1✔
1637
                logger.debug(f'Recipient is {target_author_key}')
1✔
1638
                if write_obj.add('notify', target_author_key):
1✔
1639
                    write_obj.dirty = True
1✔
1640

1641
        if obj.type == 'undo':
1✔
1642
            logger.debug('Object is an undo; adding targets for inner object')
1✔
1643
            if set(inner_obj_as1.keys()) == {'id'}:
1✔
1644
                inner_obj = from_cls.load(inner_obj_id, raise_=False)
1✔
1645
            else:
1646
                inner_obj = Object(id=inner_obj_id, our_as1=inner_obj_as1)
1✔
1647
            if inner_obj:
1✔
1648
                targets.update(from_cls.targets(inner_obj, from_user=from_user,
1✔
1649
                                                internal=True))
1650

1651
        logger.info(f'Direct targets: {[t.uri for t in targets.keys()]}')
1✔
1652

1653
        # deliver to followers, if appropriate
1654
        user_key = from_cls.actor_key(obj, allow_opt_out=allow_opt_out)
1✔
1655
        if not user_key:
1✔
1656
            logger.info("Can't tell who this is from! Skipping followers.")
1✔
1657
            return targets
1✔
1658

1659
        followers = []
1✔
1660
        if (obj.type in ('post', 'update', 'delete', 'move', 'share', 'undo')
1✔
1661
                and (not is_reply or is_self_reply)):
1662
            logger.info(f'Delivering to followers of {user_key}')
1✔
1663
            followers = []
1✔
1664
            for f in Follower.query(Follower.to == user_key,
1✔
1665
                                    Follower.status == 'active'):
1666
                proto = PROTOCOLS_BY_KIND[f.from_.kind()]
1✔
1667
                # skip protocol bot users
1668
                if (not Protocol.for_bridgy_subdomain(f.from_.id())
1✔
1669
                        # skip protocols this user hasn't enabled, or where the base
1670
                        # object of this activity hasn't been bridged
1671
                        and proto in to_protocols
1672
                        # we deliver to HAS_COPIES protocols separately, below. we
1673
                        # assume they have follower-independent targets.
1674
                        and not (proto.HAS_COPIES and proto.DEFAULT_TARGET)):
1675
                    followers.append(f)
1✔
1676

1677
            user_keys = [f.from_ for f in followers]
1✔
1678
            users = [u for u in ndb.get_multi(user_keys) if u]
1✔
1679
            User.load_multi(users)
1✔
1680

1681
            if (not followers and
1✔
1682
                (util.domain_or_parent_in(from_user.key.id(), LIMITED_DOMAINS)
1683
                 or util.domain_or_parent_in(obj.key.id(), LIMITED_DOMAINS))):
1684
                logger.info(f'skipping, {from_user.key.id()} is on a limited domain and has no followers')
1✔
1685
                return {}
1✔
1686

1687
            # add to followers' feeds, if any
1688
            if not internal and obj.type in ('post', 'update', 'share'):
1✔
1689
                if write_obj.type not in as1.ACTOR_TYPES:
1✔
1690
                    write_obj.feed = [u.key for u in users if u.USES_OBJECT_FEED]
1✔
1691
                    if write_obj.feed:
1✔
1692
                        write_obj.dirty = True
1✔
1693

1694
            # collect targets for followers
1695
            for user in users:
1✔
1696
                # TODO: should we pass remote=False through here to Protocol.load?
1697
                target = user.target_for(user.obj, shared=True) if user.obj else None
1✔
1698
                if not target:
1✔
1699
                    # TODO: surface errors like this somehow?
1700
                    logger.error(f'Follower {user.key} has no delivery target')
1✔
1701
                    continue
1✔
1702

1703
                # normalize URL (lower case hostname, etc)
1704
                # ...but preserve our PDS URL without trailing slash in path
1705
                # https://atproto.com/specs/did#did-documents
1706
                target = util.dedupe_urls([target], trailing_slash=False)[0]
1✔
1707

1708
                targets[Target(protocol=user.LABEL, uri=target)] = \
1✔
1709
                    Object.get_by_id(inner_obj_id) if obj.type == 'share' else None
1710

1711
        # deliver to enabled HAS_COPIES protocols proactively
1712
        if obj.type in ('post', 'update', 'delete', 'share'):
1✔
1713
            for proto in to_protocols:
1✔
1714
                if proto.HAS_COPIES and proto.DEFAULT_TARGET:
1✔
1715
                    logger.info(f'user has {proto.LABEL} enabled, adding {proto.DEFAULT_TARGET}')
1✔
1716
                    targets.setdefault(
1✔
1717
                        Target(protocol=proto.LABEL, uri=proto.DEFAULT_TARGET), None)
1718

1719
        # de-dupe targets, discard same-domain
1720
        # maps string target URL to (Target, Object) tuple
1721
        candidates = {t.uri: (t, obj) for t, obj in targets.items()}
1✔
1722
        # maps Target to Object or None
1723
        targets = {}
1✔
1724
        source_domains = [
1✔
1725
            util.domain_from_link(url) for url in
1726
            (obj.as1.get('id'), obj.as1.get('url'), as1.get_owner(obj.as1))
1727
            if util.is_web(url)
1728
        ]
1729
        for url in sorted(util.dedupe_urls(
1✔
1730
                candidates.keys(),
1731
                # preserve our PDS URL without trailing slash in path
1732
                # https://atproto.com/specs/did#did-documents
1733
                trailing_slash=False)):
1734
            if util.is_web(url) and util.domain_from_link(url) in source_domains:
1✔
1735
                logger.info(f'Skipping same-domain target {url}')
×
UNCOV
1736
                continue
×
1737
            target, obj = candidates[url]
1✔
1738
            targets[target] = obj
1✔
1739

1740
        return targets
1✔
1741

1742
    @classmethod
1✔
1743
    def load(cls, id, remote=None, local=True, raise_=True, **kwargs):
1✔
1744
        """Loads and returns an Object from datastore or HTTP fetch.
1745

1746
        Sets the :attr:`new` and :attr:`changed` attributes if we know either
1747
        one for the loaded object, ie local is True and remote is True or None.
1748

1749
        Args:
1750
          id (str)
1751
          remote (bool): whether to fetch the object over the network. If True,
1752
            fetches even if we already have the object stored, and updates our
1753
            stored copy. If False and we don't have the object stored, returns
1754
            None. Default (None) means to fetch over the network only if we
1755
            don't already have it stored.
1756
          local (bool): whether to load from the datastore before
1757
            fetching over the network. If False, still stores back to the
1758
            datastore after a successful remote fetch.
1759
          raise_ (bool): if False, catches any :class:`request.RequestException`
1760
            or :class:`HTTPException` raised by :meth:`fetch()` and returns
1761
            ``None`` instead
1762
          kwargs: passed through to :meth:`fetch()`
1763

1764
        Returns:
1765
          models.Object: loaded object, or None if it isn't fetchable, eg a
1766
          non-URL string for Web, or ``remote`` is False and it isn't in the
1767
          datastore
1768

1769
        Raises:
1770
          requests.HTTPError: anything that :meth:`fetch` raises, if ``raise_``
1771
            is True
1772
        """
1773
        assert id
1✔
1774
        assert local or remote is not False
1✔
1775
        # logger.debug(f'Loading Object {id} local={local} remote={remote}')
1776

1777
        obj = orig_as1 = None
1✔
1778
        if local:
1✔
1779
            obj = Object.get_by_id(id)
1✔
1780
            if not obj:
1✔
1781
                # logger.debug(f' {id} not in datastore')
1782
                pass
1✔
1783
            elif obj.as1 or obj.raw or obj.deleted:
1✔
1784
                # logger.debug(f'  {id} got from datastore')
1785
                obj.new = False
1✔
1786

1787
        if remote is False:
1✔
1788
            return obj
1✔
1789
        elif remote is None and obj:
1✔
1790
            if obj.updated < util.as_utc(util.now() - OBJECT_REFRESH_AGE):
1✔
1791
                # logger.debug(f'  last updated {obj.updated}, refreshing')
1792
                pass
1✔
1793
            else:
1794
                return obj
1✔
1795

1796
        if obj:
1✔
1797
            orig_as1 = obj.as1
1✔
1798
            obj.our_as1 = None
1✔
1799
            obj.new = False
1✔
1800
        else:
1801
            obj = Object(id=id)
1✔
1802
            if local:
1✔
1803
                # logger.debug(f'  {id} not in datastore')
1804
                obj.new = True
1✔
1805
                obj.changed = False
1✔
1806

1807
        try:
1✔
1808
            fetched = cls.fetch(obj, **kwargs)
1✔
1809
        except (RequestException, HTTPException) as e:
1✔
1810
            if raise_:
1✔
1811
                raise
1✔
1812
            util.interpret_http_exception(e)
1✔
1813
            return None
1✔
1814

1815
        if not fetched:
1✔
1816
            return None
1✔
1817

1818
        # https://stackoverflow.com/a/3042250/186123
1819
        size = len(_entity_to_protobuf(obj)._pb.SerializeToString())
1✔
1820
        if size > models.MAX_ENTITY_SIZE:
1✔
1821
            logger.warning(f'Object is too big! {size} bytes is over {models.MAX_ENTITY_SIZE}')
1✔
1822
            return None
1✔
1823

1824
        obj.resolve_ids()
1✔
1825
        obj.normalize_ids()
1✔
1826

1827
        if obj.new is False:
1✔
1828
            obj.changed = obj.activity_changed(orig_as1)
1✔
1829

1830
        if obj.source_protocol not in (cls.LABEL, cls.ABBREV):
1✔
1831
            if obj.source_protocol:
1✔
UNCOV
1832
                logger.warning(f'Object {obj.key.id()} changed protocol from {obj.source_protocol} to {cls.LABEL} ?!')
×
1833
            obj.source_protocol = cls.LABEL
1✔
1834

1835
        obj.put()
1✔
1836
        return obj
1✔
1837

1838
    @classmethod
1✔
1839
    def check_supported(cls, obj):
1✔
1840
        """If this protocol doesn't support this object, raises HTTP 204.
1841

1842
        Also reports an error.
1843

1844
        (This logic is duplicated in some protocols, eg ActivityPub, so that
1845
        they can short circuit out early. It generally uses their native formats
1846
        instead of AS1, before an :class:`models.Object` is created.)
1847

1848
        Args:
1849
          obj (Object)
1850

1851
        Raises:
1852
          werkzeug.HTTPException: if this protocol doesn't support this object
1853
        """
1854
        if not obj.type:
1✔
UNCOV
1855
            return
×
1856

1857
        inner_type = as1.object_type(as1.get_object(obj.as1)) or ''
1✔
1858
        if (obj.type not in cls.SUPPORTED_AS1_TYPES
1✔
1859
            or (obj.type in as1.CRUD_VERBS
1860
                and inner_type
1861
                and inner_type not in cls.SUPPORTED_AS1_TYPES)):
1862
            error(f"Bridgy Fed for {cls.LABEL} doesn't support {obj.type} {inner_type} yet", status=204)
1✔
1863

1864
        # don't allow posts with blank content and no image/video/audio
1865
        crud_obj = (as1.get_object(obj.as1) if obj.type in ('post', 'update')
1✔
1866
                    else obj.as1)
1867
        if (crud_obj.get('objectType') in as1.POST_TYPES
1✔
1868
                and not util.get_url(crud_obj, key='image')
1869
                and not any(util.get_urls(crud_obj, 'attachments', inner_key='stream'))
1870
                # TODO: handle articles with displayName but not content
1871
                and not source.html_to_text(crud_obj.get('content')).strip()):
1872
            error('Blank content and no image or video or audio', status=204)
1✔
1873

1874
        # DMs are only allowed to/from protocol bot accounts
1875
        if recip := as1.recipient_if_dm(obj.as1):
1✔
1876
            protocol_user_ids = PROTOCOL_DOMAINS + common.protocol_user_copy_ids()
1✔
1877
            if (not cls.SUPPORTS_DMS
1✔
1878
                    or (recip not in protocol_user_ids
1879
                        and as1.get_owner(obj.as1) not in protocol_user_ids)):
1880
                error(f"Bridgy Fed doesn't support DMs", status=204)
1✔
1881

1882

1883
@cloud_tasks_only(log=None)
1✔
1884
def receive_task():
1✔
1885
    """Task handler for a newly received :class:`models.Object`.
1886

1887
    Calls :meth:`Protocol.receive` with the form parameters.
1888

1889
    Parameters:
1890
      authed_as (str): passed to :meth:`Protocol.receive`
1891
      obj_id (str): key id of :class:`models.Object` to handle
1892
      received_at (str, ISO 8601 timestamp): when we first saw (received)
1893
        this activity
1894
      *: If ``obj_id`` is unset, all other parameters are properties for a new
1895
        :class:`models.Object` to handle
1896

1897
    TODO: migrate incoming webmentions to this. See how we did it for AP. The
1898
    difficulty is that parts of :meth:`protocol.Protocol.receive` depend on
1899
    setup in :func:`web.webmention`, eg :class:`models.Object` with ``new`` and
1900
    ``changed``, HTTP request details, etc. See stash for attempt at this for
1901
    :class:`web.Web`.
1902
    """
1903
    common.log_request()
1✔
1904
    form = request.form.to_dict()
1✔
1905

1906
    authed_as = form.pop('authed_as', None)
1✔
1907
    internal = (authed_as == common.PRIMARY_DOMAIN
1✔
1908
                or authed_as in common.PROTOCOL_DOMAINS)
1909

1910
    obj = Object.from_request()
1✔
1911
    assert obj
1✔
1912
    assert obj.source_protocol
1✔
1913
    obj.new = True
1✔
1914

1915
    if received_at := form.pop('received_at', None):
1✔
1916
        received_at = datetime.fromisoformat(received_at)
1✔
1917

1918
    try:
1✔
1919
        return PROTOCOLS[obj.source_protocol].receive(
1✔
1920
            obj=obj, authed_as=authed_as, internal=internal, received_at=received_at)
1921
    except RequestException as e:
1✔
1922
        util.interpret_http_exception(e)
1✔
1923
        error(e, status=304)
1✔
1924
    except ValueError as e:
1✔
1925
        logger.warning(e, exc_info=True)
×
UNCOV
1926
        error(e, status=304)
×
1927

1928

1929
@cloud_tasks_only(log=None)
1✔
1930
def send_task():
1✔
1931
    """Task handler for sending an activity to a single specific destination.
1932

1933
    Calls :meth:`Protocol.send` with the form parameters.
1934

1935
    Parameters:
1936
      protocol (str): :class:`Protocol` to send to
1937
      url (str): destination URL to send to
1938
      obj_id (str): key id of :class:`models.Object` to send
1939
      orig_obj_id (str): optional, :class:`models.Object` key id of the
1940
        "original object" that this object refers to, eg replies to or reposts
1941
        or likes
1942
      user (url-safe google.cloud.ndb.key.Key): :class:`models.User` (actor)
1943
        this activity is from
1944
      *: If ``obj_id`` is unset, all other parameters are properties for a new
1945
        :class:`models.Object` to handle
1946
    """
1947
    common.log_request()
1✔
1948

1949
    # prepare
1950
    form = request.form.to_dict()
1✔
1951
    url = form.get('url')
1✔
1952
    protocol = form.get('protocol')
1✔
1953
    if not url or not protocol:
1✔
1954
        logger.warning(f'Missing protocol or url; got {protocol} {url}')
1✔
1955
        return '', 204
1✔
1956

1957
    target = Target(uri=url, protocol=protocol)
1✔
1958
    obj = Object.from_request()
1✔
1959
    assert obj and obj.key and obj.key.id()
1✔
1960

1961
    PROTOCOLS[protocol].check_supported(obj)
1✔
1962
    allow_opt_out = (obj.type == 'delete')
1✔
1963

1964
    user = None
1✔
1965
    if user_key := form.get('user'):
1✔
1966
        key = ndb.Key(urlsafe=user_key)
1✔
1967
        # use get_by_id so that we follow use_instead
1968
        user = PROTOCOLS_BY_KIND[key.kind()].get_by_id(
1✔
1969
            key.id(), allow_opt_out=allow_opt_out)
1970

1971
    # send
1972
    delay = ''
1✔
1973
    if request.headers.get('X-AppEngine-TaskRetryCount') == '0' and obj.created:
1✔
1974
        delay_s = int((util.now().replace(tzinfo=None) - obj.created).total_seconds())
1✔
1975
        delay = f'({delay_s} s behind)'
1✔
1976
    logger.info(f'Sending {obj.source_protocol} {obj.type} {obj.key.id()} to {protocol} {url} {delay}')
1✔
1977
    logger.debug(f'  AS1: {json_dumps(obj.as1, indent=2)}')
1✔
1978
    sent = None
1✔
1979
    try:
1✔
1980
        sent = PROTOCOLS[protocol].send(obj, url, from_user=user,
1✔
1981
                                        orig_obj_id=form.get('orig_obj_id'))
1982
    except BaseException as e:
1✔
1983
        code, body = util.interpret_http_exception(e)
1✔
1984
        if not code and not body:
1✔
1985
            raise
1✔
1986

1987
    if sent is False:
1✔
1988
        logger.info(f'Failed sending!')
1✔
1989

1990
    return '', 200 if sent else 204 if sent is False else 304
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc