• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

snarfed / bridgy-fed / fb2666e9-c1f7-49f9-81b9-777c03054750

02 Jun 2024 04:44AM UTC coverage: 94.127% (-0.01%) from 94.141%
fb2666e9-c1f7-49f9-81b9-777c03054750

push

circleci

snarfed
ndb global cache: limit to users and profile objects, not other objects/activities

5 of 5 new or added lines in 3 files covered. (100.0%)

26 existing lines in 5 files now uncovered.

3750 of 3984 relevant lines covered (94.13%)

0.94 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

94.58
/protocol.py
1
"""Base protocol class and common code."""
2
import copy
1✔
3
from datetime import timedelta
1✔
4
import logging
1✔
5
import re
1✔
6
from threading import Lock
1✔
7
from urllib.parse import urljoin, urlparse
1✔
8

9
from cachetools import cached, LRUCache
1✔
10
from flask import request
1✔
11
from google.cloud import ndb
1✔
12
from google.cloud.ndb import OR
1✔
13
from google.cloud.ndb.model import _entity_to_protobuf
1✔
14
from granary import as1, as2
1✔
15
from granary.source import html_to_text
1✔
16
from oauth_dropins.webutil.appengine_info import DEBUG
1✔
17
from oauth_dropins.webutil.flask_util import cloud_tasks_only
1✔
18
from oauth_dropins.webutil import models
1✔
19
from oauth_dropins.webutil import util
1✔
20
from oauth_dropins.webutil.util import json_dumps, json_loads
1✔
21
import werkzeug.exceptions
1✔
22

23
import common
1✔
24
from common import (
1✔
25
    add,
26
    DOMAIN_BLOCKLIST,
27
    DOMAIN_RE,
28
    DOMAINS,
29
    error,
30
    PRIMARY_DOMAIN,
31
    PROTOCOL_DOMAINS,
32
    subdomain_wrap,
33
)
34
from ids import normalize_user_id, translate_object_id, translate_user_id
1✔
35
from models import Follower, get_originals, Object, PROTOCOLS, Target, User
1✔
36

37
SUPPORTED_TYPES = (
1✔
38
    'accept',
39
    'article',
40
    'audio',
41
    'block',
42
    'comment',
43
    'delete',
44
    'follow',
45
    'image',
46
    'like',
47
    'note',
48
    'post',
49
    'share',
50
    'stop-following',
51
    'undo',
52
    'update',
53
    'video',
54
)
55

56
OBJECT_REFRESH_AGE = timedelta(days=30)
1✔
57

58
# require a follow for users on these domains before we deliver anything from
59
# them other than their profile
60
LIMITED_DOMAINS = util.load_file_lines('limited_domains')
1✔
61

62
# activity ids that we've already handled and can now ignore.
63
# used in Protocol.receive
64
seen_ids = LRUCache(100000)
1✔
65
seen_ids_lock = Lock()
1✔
66

67
# objects that have been loaded in Protocol.load
68
objects_cache = LRUCache(5000)
1✔
69
objects_cache_lock = Lock()
1✔
70

71
logger = logging.getLogger(__name__)
1✔
72

73

74
class Protocol:
1✔
75
    """Base protocol class. Not to be instantiated; classmethods only.
76

77
    Attributes:
78
      LABEL (str): human-readable lower case name
79
      OTHER_LABELS (list of str): label aliases
80
      ABBREV (str): lower case abbreviation, used in URL paths
81
      PHRASE (str): human-readable name or phrase. Used in phrases like
82
        ``Follow this person on {PHRASE}``
83
      LOGO_HTML (str): logo emoji or ``<img>`` tag
84
      CONTENT_TYPE (str): MIME type of this protocol's native data format,
85
        appropriate for the ``Content-Type`` HTTP header.
86
      HAS_FOLLOW_ACCEPTS (bool): whether this protocol supports explicit
87
        accept/reject activities in response to follows, eg ActivityPub
88
      HAS_COPIES (bool): whether this protocol is push and needs us to
89
        proactively create "copy" users and objects, as opposed to pulling
90
        converted objects on demand
91
      REQUIRES_AVATAR (bool): whether accounts on this protocol are required
92
        to have a profile picture. If they don't, their ``User.status`` will be
93
        ``blocked``.
94
      REQUIRES_NAME (bool): whether accounts on this protocol are required to
95
        have a profile name that's different than their handle or id. If they
96
        don't, their ``User.status`` will be ``blocked``.
97
      REQUIRES_OLD_ACCOUNT: (bool): whether accounts on this protocol are
98
        required to be at least :const:`common.OLD_ACCOUNT_AGE` old. If their
99
        profile includes creation date and it's not old enough, their
100
        ``User.status`` will be ``blocked``.
101
      DEFAULT_ENABLED_PROTOCOLS (list of str): labels of other protocols that
102
        are automatically enabled for this protocol to bridge into
103
    """
104
    ABBREV = None
1✔
105
    PHRASE = None
1✔
106
    OTHER_LABELS = ()
1✔
107
    LOGO_HTML = ''
1✔
108
    CONTENT_TYPE = None
1✔
109
    HAS_FOLLOW_ACCEPTS = False
1✔
110
    HAS_COPIES = False
1✔
111
    REQUIRES_AVATAR = False
1✔
112
    REQUIRES_NAME = False
1✔
113
    REQUIRES_OLD_ACCOUNT = False
1✔
114
    DEFAULT_ENABLED_PROTOCOLS = ()
1✔
115

116
    def __init__(self):
1✔
117
        assert False
×
118

119
    @classmethod
1✔
120
    @property
1✔
121
    def LABEL(cls):
1✔
122
        return cls.__name__.lower()
1✔
123

124
    @staticmethod
1✔
125
    def for_request(fed=None):
1✔
126
        """Returns the protocol for the current request.
127

128
        ...based on the request's hostname.
129

130
        Args:
131
          fed (str or protocol.Protocol): protocol to return if the current
132
            request is on ``fed.brid.gy``
133

134
        Returns:
135
          Protocol: protocol, or None if the provided domain or request hostname
136
          domain is not a subdomain of ``brid.gy`` or isn't a known protocol
137
        """
138
        return Protocol.for_bridgy_subdomain(request.host, fed=fed)
1✔
139

140
    @staticmethod
1✔
141
    def for_bridgy_subdomain(domain_or_url, fed=None):
1✔
142
        """Returns the protocol for a brid.gy subdomain.
143

144
        Args:
145
          domain_or_url (str)
146
          fed (str or protocol.Protocol): protocol to return if the current
147
            request is on ``fed.brid.gy``
148

149
        Returns:
150
          class: :class:`Protocol` subclass, or None if the provided domain or request
151
          hostname domain is not a subdomain of ``brid.gy`` or isn't a known
152
          protocol
153
        """
154
        domain = (util.domain_from_link(domain_or_url, minimize=False)
1✔
155
                  if util.is_web(domain_or_url)
156
                  else domain_or_url)
157

158
        if domain == common.PRIMARY_DOMAIN or domain in common.LOCAL_DOMAINS:
1✔
159
            return PROTOCOLS[fed] if isinstance(fed, str) else fed
1✔
160
        elif domain and domain.endswith(common.SUPERDOMAIN):
1✔
161
            label = domain.removesuffix(common.SUPERDOMAIN)
1✔
162
            return PROTOCOLS.get(label)
1✔
163

164
    @classmethod
1✔
165
    def owns_id(cls, id):
1✔
166
        """Returns whether this protocol owns the id, or None if it's unclear.
167

168
        To be implemented by subclasses.
169

170
        IDs are string identities that uniquely identify users, and are intended
171
        primarily to be machine readable and usable. Compare to handles, which
172
        are human-chosen, human-meaningful, and often but not always unique.
173

174
        Some protocols' ids are more or less deterministic based on the id
175
        format, eg AT Protocol owns ``at://`` URIs. Others, like http(s) URLs,
176
        could be owned by eg Web or ActivityPub.
177

178
        This should be a quick guess without expensive side effects, eg no
179
        external HTTP fetches to fetch the id itself or otherwise perform
180
        discovery.
181

182
        Returns False if the id's domain is in :const:`common.DOMAIN_BLOCKLIST`.
183

184
        Args:
185
          id (str)
186

187
        Returns:
188
          bool or None:
189
        """
190
        return False
1✔
191

192
    @classmethod
1✔
193
    def owns_handle(cls, handle, allow_internal=False):
1✔
194
        """Returns whether this protocol owns the handle, or None if it's unclear.
195

196
        To be implemented by subclasses.
197

198
        Handles are string identities that are human-chosen, human-meaningful,
199
        and often but not always unique. Compare to IDs, which uniquely identify
200
        users, and are intended primarily to be machine readable and usable.
201

202
        Some protocols' handles are more or less deterministic based on the id
203
        format, eg ActivityPub (technically WebFinger) handles are
204
        ``@user@instance.com``. Others, like domains, could be owned by eg Web,
205
        ActivityPub, AT Protocol, or others.
206

207
        This should be a quick guess without expensive side effects, eg no
208
        external HTTP fetches to fetch the id itself or otherwise perform
209
        discovery.
210

211
        Args:
212
          handle (str)
213
          allow_internal (bool): whether to return False for internal domains
214
            like ``fed.brid.gy``, ``bsky.brid.gy``, etc
215

216
        Returns:
217
          bool or None
218
        """
219
        return False
1✔
220

221
    @classmethod
1✔
222
    def handle_to_id(cls, handle):
1✔
223
        """Converts a handle to an id.
224

225
        To be implemented by subclasses.
226

227
        May incur network requests, eg DNS queries or HTTP requests.
228

229
        Args:
230
          handle (str)
231

232
        Returns:
233
          str: corresponding id, or None if the handle can't be found
234
        """
235
        raise NotImplementedError()
×
236

237
    @classmethod
1✔
238
    def key_for(cls, id):
1✔
239
        """Returns the :class:`google.cloud.ndb.Key` for a given id's :class:`models.User`.
240

241
        To be implemented by subclasses. Canonicalizes the id if necessary.
242

243
        If called via `Protocol.key_for`, infers the appropriate protocol with
244
        :meth:`for_id`. If called with a concrete subclass, uses that subclass
245
        as is.
246

247
        Returns:
248
          google.cloud.ndb.Key: matching key, or None if the given id is not a
249
          valid :class:`User` id for this protocol.
250
        """
251
        if cls == Protocol:
1✔
252
            proto = Protocol.for_id(id)
1✔
253
            return proto.key_for(id) if proto else None
1✔
254

255
        # load user so that we follow use_instead
256
        existing = cls.get_by_id(id, allow_opt_out=True)
1✔
257
        if existing:
1✔
258
            if existing.status:
1✔
259
                return None
1✔
260
            return existing.key
1✔
261

262
        return cls(id=id).key
1✔
263

264
    @cached(LRUCache(20000), lock=Lock())
1✔
265
    @staticmethod
1✔
266
    def for_id(id, remote=True):
1✔
267
        """Returns the protocol for a given id.
268

269
        Args:
270
          id (str)
271
          remote (bool): whether to perform expensive side effects like fetching
272
            the id itself over the network, or other discovery.
273

274
        Returns:
275
          Protocol subclass: matching protocol, or None if no single known
276
          protocol definitively owns this id
277
        """
278
        logger.info(f'Determining protocol for id {id}')
1✔
279
        if not id:
1✔
280
            return None
1✔
281

282
        if util.is_web(id):
1✔
283
            # step 1: check for our per-protocol subdomains
284
            is_homepage = urlparse(id).path.strip('/') == ''
1✔
285
            by_subdomain = Protocol.for_bridgy_subdomain(id)
1✔
286
            if by_subdomain and not is_homepage:
1✔
287
                logger.info(f'  {by_subdomain.LABEL} owns id {id}')
1✔
288
                return by_subdomain
1✔
289

290
        # step 2: check if any Protocols say conclusively that they own it
291
        # sort to be deterministic
292
        protocols = sorted(set(p for p in PROTOCOLS.values() if p),
1✔
293
                           key=lambda p: p.LABEL)
294
        candidates = []
1✔
295
        for protocol in protocols:
1✔
296
            owns = protocol.owns_id(id)
1✔
297
            if owns:
1✔
298
                logger.info(f'  {protocol.LABEL} owns id {id}')
1✔
299
                return protocol
1✔
300
            elif owns is not False:
1✔
301
                candidates.append(protocol)
1✔
302

303
        if len(candidates) == 1:
1✔
304
            logger.info(f'  {candidates[0].LABEL} owns id {id}')
1✔
305
            return candidates[0]
1✔
306

307
        # step 3: look for existing Objects in the datastore
308
        obj = Protocol.load(id, remote=False)
1✔
309
        if obj and obj.source_protocol:
1✔
310
            logger.info(f'  {obj.key} owned by source_protocol {obj.source_protocol}')
1✔
311
            return PROTOCOLS[obj.source_protocol]
1✔
312

313
        # step 4: fetch over the network, if necessary
314
        if not remote:
1✔
315
            return None
1✔
316

317
        for protocol in candidates:
1✔
318
            logger.info(f'Trying {protocol.LABEL}')
1✔
319
            try:
1✔
320
                if protocol.load(id, local=False, remote=True):
1✔
321
                    logger.info(f'  {protocol.LABEL} owns id {id}')
1✔
322
                    return protocol
1✔
323
            except werkzeug.exceptions.BadGateway:
1✔
324
                # we tried and failed fetching the id over the network.
325
                # this depends on ActivityPub.fetch raising this!
326
                return None
1✔
327
            except werkzeug.exceptions.HTTPException as e:
×
328
                # internal error we generated ourselves; try next protocol
329
                pass
×
330
            except Exception as e:
×
331
                code, _ = util.interpret_http_exception(e)
×
332
                if code:
×
333
                    # we tried and failed fetching the id over the network
334
                    return None
×
335
                raise
×
336

337
        logger.info(f'No matching protocol found for {id} !')
1✔
338
        return None
1✔
339

340
    @staticmethod
1✔
341
    def for_handle(handle):
1✔
342
        """Returns the protocol for a given handle.
343

344
        May incur expensive side effects like resolving the handle itself over
345
        the network or other discovery.
346

347
        Args:
348
          handle (str)
349

350
        Returns:
351
          (Protocol subclass, str) tuple: matching protocol and optional id (if
352
          resolved), or ``(None, None)`` if no known protocol owns this handle
353
        """
354
        # TODO: normalize, eg convert domains to lower case
355
        logger.info(f'Determining protocol for handle {handle}')
1✔
356
        if not handle:
1✔
357
            return (None, None)
1✔
358

359
        # step 1: check if any Protocols say conclusively that they own it.
360
        # sort to be deterministic.
361
        protocols = sorted(set(p for p in PROTOCOLS.values() if p),
1✔
362
                           key=lambda p: p.LABEL)
363
        candidates = []
1✔
364
        for proto in protocols:
1✔
365
            owns = proto.owns_handle(handle)
1✔
366
            if owns:
1✔
367
                logger.info(f'  {proto.LABEL} owns handle {handle}')
1✔
368
                return (proto, None)
1✔
369
            elif owns is not False:
1✔
370
                candidates.append(proto)
1✔
371

372
        if len(candidates) == 1:
1✔
373
            logger.info(f'  {candidates[0].LABEL} owns handle {handle}')
×
374
            return (candidates[0], None)
×
375

376
        # step 2: look for matching User in the datastore
377
        for proto in candidates:
1✔
378
            user = proto.query(proto.handle == handle).get()
1✔
379
            if user:
1✔
380
                if user.status:
1✔
381
                    return (None, None)
1✔
382
                logger.info(f'  user {user.key} owns handle {handle}')
1✔
383
                return (proto, user.key.id())
1✔
384

385
        # step 3: resolve handle to id
386
        for proto in candidates:
1✔
387
            id = proto.handle_to_id(handle)
1✔
388
            if id:
1✔
389
                logger.info(f'  {proto.LABEL} resolved handle {handle} to id {id}')
1✔
390
                return (proto, id)
1✔
391

392
        return (None, None)
1✔
393

394
    @classmethod
1✔
395
    def bridged_web_url_for(cls, user):
1✔
396
        """Returns the web URL for a user's bridged profile in this protocol.
397

398
        For example, for Web user ``alice.com``, :meth:`ATProto.bridged_web_url_for`
399
        returns ``https://bsky.app/profile/alice.com.web.brid.gy``
400

401
        Args:
402
          user (models.User)
403

404
        Returns:
405
          str, or None if there isn't a canonical URL
406
        """
407
        return None
1✔
408

409
    @classmethod
1✔
410
    def actor_key(cls, obj):
1✔
411
        """Returns the :class:`User`: key for a given object's author or actor.
412

413
        Args:
414
          obj (models.Object)
415

416
        Returns:
417
          google.cloud.ndb.key.Key or None:
418
        """
419
        owner = as1.get_owner(obj.as1)
1✔
420
        if owner:
1✔
421
            return cls.key_for(owner)
1✔
422

423
    @classmethod
1✔
424
    def bot_user_id(cls):
1✔
425
        """Returns the Web user id for the bot user for this protocol.
426

427
        For example, ``'bsky.brid.gy'`` for ATProto.
428

429
        Returns:
430
          str:
431
        """
432
        return f'{cls.ABBREV}{common.SUPERDOMAIN}'
1✔
433

434
    @classmethod
1✔
435
    def create_for(cls, user):
1✔
436
        """Creates a copy user in this protocol.
437

438
        Should add the copy user to :attr:`copies`.
439

440
        Args:
441
          user (models.User): original source user. Shouldn't already have a
442
            copy user for this protocol in :attr:`copies`.
443

444
        Raises:
445
          ValueError: if we can't create a copy of the given user in this protocol
446
        """
447
        raise NotImplementedError()
×
448

449
    @classmethod
1✔
450
    def send(to_cls, obj, url, from_user=None, orig_obj=None):
1✔
451
        """Sends an outgoing activity.
452

453
        To be implemented by subclasses.
454

455
        Args:
456
          obj (models.Object): with activity to send
457
          url (str): destination URL to send to
458
          from_user (models.User): user (actor) this activity is from
459
          orig_obj (models.Object): the "original object" that this object
460
            refers to, eg replies to or reposts or likes
461

462
        Returns:
463
          bool: True if the activity is sent successfully, False if it is
464
          ignored or otherwise unsent due to protocol logic, eg no webmention
465
          endpoint, protocol doesn't support the activity type. (Failures are
466
          raised as exceptions.)
467

468
        Raises:
469
          werkzeug.HTTPException if the request fails
470
        """
471
        raise NotImplementedError()
×
472

473
    @classmethod
1✔
474
    def fetch(cls, obj, **kwargs):
1✔
475
        """Fetches a protocol-specific object and populates it in an :class:`Object`.
476

477
        Errors are raised as exceptions. If this method returns False, the fetch
478
        didn't fail but didn't succeed either, eg the id isn't valid for this
479
        protocol, or the fetch didn't return valid data for this protocol.
480

481
        To be implemented by subclasses.
482

483
        Args:
484
          obj (models.Object): with the id to fetch. Data is filled into one of
485
            the protocol-specific properties, eg ``as2``, ``mf2``, ``bsky``.
486
          kwargs: subclass-specific
487

488
        Returns:
489
          bool: True if the object was fetched and populated successfully,
490
          False otherwise
491

492
        Raises:
493
          werkzeug.HTTPException: if the fetch fails
494
        """
495
        raise NotImplementedError()
×
496

497
    @classmethod
1✔
498
    def convert(cls, obj, from_user=None, **kwargs):
1✔
499
        """Converts an :class:`Object` to this protocol's data format.
500

501
        For example, an HTML string for :class:`Web`, or a dict with AS2 JSON
502
        and ``application/activity+json`` for :class:`ActivityPub`.
503

504
        Just passes through to :meth:`_convert`, then does minor
505
        protocol-independent postprocessing.
506

507
        Args:
508
          obj (models.Object):
509
          from_user (models.User): user (actor) this activity/object is from
510
          kwargs: protocol-specific, passed through to :meth:`_convert`
511

512
        Returns:
513
          converted object in the protocol's native format, often a dict
514
        """
515
        id = None
1✔
516
        if obj:
1✔
517
            id = obj.key.id() if obj.key else obj.as1.get('id') if obj.as1 else None
1✔
518

519
        # mark bridged actors as bots and add "bridged by Bridgy Fed" to their bios
520
        if (from_user and obj and obj.as1
1✔
521
            and obj.as1.get('objectType') in as1.ACTOR_TYPES
522
            and PROTOCOLS.get(obj.source_protocol) != cls
523
            and Protocol.for_bridgy_subdomain(id) not in DOMAINS
524
            and 'Bridgy Fed]' not in html_to_text(obj.as1.get('summary', ''))
525
            # Web users are special cased, they don't get the label if they've
526
            # explicitly enabled Bridgy Fed with redirects or webmentions
527
            and not (from_user.LABEL == 'web'
528
                     and (from_user.last_webmention_in or from_user.has_redirects))):
529
            obj.our_as1 = copy.deepcopy(obj.as1)
1✔
530
            obj.our_as1['objectType'] = 'application'
1✔
531

532
            if from_user.key and id == from_user.profile_id():
1✔
533
                disclaimer = f'[<a href="https://{PRIMARY_DOMAIN}{from_user.user_page_path()}">bridged</a> from <a href="{from_user.web_url()}">{from_user.handle_or_id()}</a> by <a href="https://{PRIMARY_DOMAIN}/">Bridgy Fed</a>]'
1✔
534
            else:
535
                url = as1.get_url(obj.our_as1) or id
1✔
536
                name = obj.our_as1.get('displayName') or obj.our_as1.get('username')
1✔
537
                source = (util.pretty_link(url, text=name) if url
1✔
538
                          else name if name
539
                          else '')
540
                if source:
1✔
541
                    source = ' from ' + source
1✔
542
                disclaimer = f'[bridged{source} by <a href="https://{PRIMARY_DOMAIN}/">Bridgy Fed</a>]'
1✔
543

544

545
            summary = obj.our_as1.setdefault('summary', '')
1✔
546
            if not summary.endswith(disclaimer):
1✔
547
                if summary:
1✔
548
                    obj.our_as1['summary'] += '<br><br>'
1✔
549
                obj.our_as1['summary'] += disclaimer
1✔
550

551
        return cls._convert(obj, from_user=from_user, **kwargs)
1✔
552

553
    @classmethod
1✔
554
    def _convert(cls, obj, from_user=None, **kwargs):
1✔
555
        """Converts an :class:`Object` to this protocol's data format.
556

557
        To be implemented by subclasses. Implementations should generally call
558
        :meth:`Protocol.translate_ids` (as their own class) before converting to
559
        their format.
560

561
        Args:
562
          obj (models.Object):
563
          from_user (models.User): user (actor) this activity/object is from
564
          kwargs: protocol-specific
565

566
        Returns:
567
          converted object in the protocol's native format, often a dict. May
568
            return the ``{}`` empty dict if the object can't be converted.
569
        """
570
        raise NotImplementedError()
×
571

572
    @classmethod
1✔
573
    def target_for(cls, obj, shared=False):
1✔
574
        """Returns an :class:`Object`'s delivery target (endpoint).
575

576
        To be implemented by subclasses.
577

578
        Examples:
579

580
        * If obj has ``source_protocol`` ``web``, returns its URL, as a
581
          webmention target.
582
        * If obj is an ``activitypub`` actor, returns its inbox.
583
        * If obj is an ``activitypub`` object, returns it's author's or actor's
584
          inbox.
585

586
        Args:
587
          obj (models.Object):
588
          shared (bool): optional. If True, returns a common/shared
589
            endpoint, eg ActivityPub's ``sharedInbox``, that can be reused for
590
            multiple recipients for efficiency
591

592
        Returns:
593
          str: target endpoint, or None if not available.
594
        """
595
        raise NotImplementedError()
×
596

597
    @classmethod
1✔
598
    def is_blocklisted(cls, url, allow_internal=False):
1✔
599
        """Returns True if we block the given URL and shouldn't deliver to it.
600

601
        Default implementation here, subclasses may override.
602

603
        Args:
604
          url (str):
605
          allow_internal (bool): whether to return False for internal domains
606
            like ``fed.brid.gy``, ``bsky.brid.gy``, etc
607

608
        Returns: bool:
609
        """
610
        blocklist = DOMAIN_BLOCKLIST
1✔
611
        if not allow_internal:
1✔
612
            blocklist += DOMAINS
1✔
613
        return util.domain_or_parent_in(util.domain_from_link(url), blocklist)
1✔
614

615
    @classmethod
1✔
616
    def translate_ids(to_cls, obj):
1✔
617
        """Translates all ids in an AS1 object to a specific protocol.
618

619
        Infers source protocol for each id value separately.
620

621
        For example, if ``proto`` is :class:`ActivityPub`, the ATProto URI
622
        ``at://did:plc:abc/coll/123`` will be converted to
623
        ``https://bsky.brid.gy/ap/at://did:plc:abc/coll/123``.
624

625
        Wraps these AS1 fields:
626

627
        * ``id``
628
        * ``actor``
629
        * ``author``
630
        * ``object``
631
        * ``object.actor``
632
        * ``object.author``
633
        * ``object.id``
634
        * ``object.inReplyTo``
635
        * ``tags.[objectType=mention].url``
636

637
        This is the inverse of :meth:`models.Object.resolve_ids`. Much of the
638
        same logic is duplicated there!
639

640
        TODO: unify with :meth:`Object.resolve_ids`,
641
        :meth:`models.Object.normalize_ids`.
642

643
        Args:
644
          to_proto (Protocol subclass)
645
          obj (dict): AS1 object or activity (not :class:`models.Object`!)
646

647
        Returns:
648
          dict: wrapped version of ``obj``
649
        """
650
        assert to_cls != Protocol
1✔
651
        if not obj:
1✔
652
            return obj
×
653

654
        outer_obj = copy.deepcopy(obj)
1✔
655
        inner_obj = outer_obj['object'] = as1.get_object(outer_obj)
1✔
656

657
        def translate(elem, field, fn):
1✔
658
            elem[field] = as1.get_object(elem, field)
1✔
659
            if id := elem[field].get('id'):
1✔
660
                from_cls = Protocol.for_id(id)
1✔
661
                # TODO: what if from_cls is None? relax translate_object_id,
662
                # make it a noop if we don't know enough about from/to?
663
                if from_cls and from_cls != to_cls:
1✔
664
                    elem[field]['id'] = fn(id=id, from_=from_cls, to=to_cls)
1✔
665
            if elem[field].keys() == {'id'}:
1✔
666
                elem[field] = elem[field]['id']
1✔
667

668
        type = as1.object_type(outer_obj)
1✔
669
        translate(outer_obj, 'id',
1✔
670
                  translate_user_id if type in as1.ACTOR_TYPES
671
                  else translate_object_id)
672

673
        inner_is_actor = (as1.object_type(inner_obj) in as1.ACTOR_TYPES
1✔
674
                          or type in ('follow', 'stop-following'))
675
        translate(inner_obj, 'id',
1✔
676
                  translate_user_id if inner_is_actor else translate_object_id)
677

678
        for o in outer_obj, inner_obj:
1✔
679
            translate(o, 'inReplyTo', translate_object_id)
1✔
680
            for field in 'actor', 'author':
1✔
681
                translate(o, field, translate_user_id)
1✔
682
            for tag in as1.get_objects(o, 'tags'):
1✔
683
                if tag.get('objectType') == 'mention':
1✔
684
                    translate(tag, 'url', translate_user_id)
1✔
685

686
        outer_obj = util.trim_nulls(outer_obj)
1✔
687
        if outer_obj.get('object', {}).keys() == {'id'}:
1✔
688
            outer_obj['object'] = inner_obj['id']
1✔
689

690
        return outer_obj
1✔
691

692
    @classmethod
1✔
693
    def receive(from_cls, obj, authed_as=None, internal=False):
1✔
694
        """Handles an incoming activity.
695

696
        If ``obj``'s key is unset, ``obj.as1``'s id field is used. If both are
697
        unset, raises :class:`werkzeug.exceptions.BadRequest`.
698

699
        Args:
700
          obj (models.Object)
701
          authed_as (str): authenticated actor id who sent this activity
702
          internal (bool): whether to allow activity ids on internal domains
703

704
        Returns:
705
          (str, int) tuple: (response body, HTTP status code) Flask response
706

707
        Raises:
708
          werkzeug.HTTPException: if the request is invalid
709
        """
710
        # check some invariants
711
        assert from_cls != Protocol
1✔
712
        assert isinstance(obj, Object), obj
1✔
713
        logger.info(f'From {from_cls.LABEL}: {obj.key} AS1: {json_dumps(obj.as1, indent=2)}')
1✔
714

715
        if not obj.as1:
1✔
716
            error('No object data provided')
×
717

718
        id = None
1✔
719
        if obj.key and obj.key.id():
1✔
720
            id = obj.key.id()
1✔
721

722
        if not id:
1✔
723
            id = obj.as1.get('id')
1✔
724
            obj.key = ndb.Key(Object, id)
1✔
725

726
        if not id:
1✔
727
            error('No id provided')
×
728
        elif from_cls.is_blocklisted(id, allow_internal=internal):
1✔
729
            error(f'Activity {id} is blocklisted')
1✔
730

731
        # short circuit if we've already seen this activity id.
732
        # (don't do this for bare objects since we need to check further down
733
        # whether they've been updated since we saw them last.)
734
        if obj.as1.get('objectType') == 'activity' and 'force' not in request.values:
1✔
735
            with seen_ids_lock:
1✔
736
                already_seen = id in seen_ids
1✔
737
                seen_ids[id] = True
1✔
738

739
            if (already_seen
1✔
740
                    or (obj.new is False and obj.changed is False)
741
                    or (obj.new is None and obj.changed is None
742
                        and from_cls.load(id, remote=False))):
743
                msg = f'Already handled this activity {id}'
1✔
744
                logger.info(msg)
1✔
745
                return msg, 204
1✔
746

747
        # load actor user, check authorization
748
        actor = as1.get_owner(obj.as1)
1✔
749
        if not actor:
1✔
750
            error('Activity missing actor or author', status=400)
1✔
751
        elif from_cls.owns_id(actor) is False:
1✔
752
            error(f"{from_cls.LABEL} doesn't own actor {actor}, this is probably a bridged activity. Skipping.", status=204)
1✔
753

754
        if authed_as:
1✔
755
            assert isinstance(authed_as, str)
1✔
756
            authed_as = normalize_user_id(id=authed_as, proto=from_cls)
1✔
757
            actor = normalize_user_id(id=actor, proto=from_cls)
1✔
758
            if actor != authed_as:
1✔
759
                logger.warning(f"Auth: actor {actor} isn't authed user {authed_as}")
1✔
760
        else:
761
            logger.warning(f"Auth: missing authed_as!")
1✔
762

763
        # update copy ids to originals
764
        obj.resolve_ids()
1✔
765

766
        if (obj.type == 'follow'
1✔
767
                and Protocol.for_bridgy_subdomain(as1.get_object(obj.as1).get('id'))):
768
            # refresh profile for follows of bot users to re-check whether
769
            # they're opted out
770
            logger.info(f'Follow of bot user, reloading {actor}')
1✔
771
            from_cls.load(actor, remote=True)
1✔
772
            from_user = from_cls.get_or_create(id=actor, allow_opt_out=True)
1✔
773
        else:
774
            # load actor user
775
            from_user = from_cls.get_or_create(id=actor)
1✔
776

777
        if not from_user or from_user.manual_opt_out:
1✔
778
            error(f'Actor {actor} is opted out or blocked', status=204)
1✔
779

780
        # write Object to datastore
781
        orig = obj
1✔
782
        obj = Object.get_or_create(id, authed_as=actor, **orig.to_dict())
1✔
783
        if orig.new is not None:
1✔
784
            obj.new = orig.new
1✔
785
        if orig.changed is not None:
1✔
786
            obj.changed = orig.changed
1✔
787

788
        # if this is a post, ie not an activity, wrap it in a create or update
789
        obj = from_cls.handle_bare_object(obj, authed_as=authed_as)
1✔
790
        obj.add('users', from_user.key)
1✔
791

792
        if obj.type not in SUPPORTED_TYPES:
1✔
793
            error(f'Sorry, {obj.type} activities are not supported yet.', status=501)
1✔
794

795
        inner_obj_as1 = as1.get_object(obj.as1)
1✔
796
        if obj.as1.get('verb') in ('post', 'update', 'delete'):
1✔
797
            if inner_owner := as1.get_owner(inner_obj_as1):
1✔
798
                if inner_owner_key := from_cls.key_for(inner_owner):
1✔
799
                    obj.add('users', inner_owner_key)
1✔
800

801
        obj.source_protocol = from_cls.LABEL
1✔
802
        obj.put()
1✔
803

804
        # store inner object
805
        inner_obj_id = inner_obj_as1.get('id')
1✔
806
        if obj.type in ('post', 'update') and inner_obj_as1.keys() > set(['id']):
1✔
807
            Object.get_or_create(inner_obj_id, our_as1=inner_obj_as1,
1✔
808
                                 source_protocol=from_cls.LABEL, authed_as=actor)
809

810
        actor = as1.get_object(obj.as1, 'actor')
1✔
811
        actor_id = actor.get('id')
1✔
812

813
        # handle activity!
814

815
        # accept, eg in response to a follow. only send if the destination
816
        # supports accepts.
817
        if obj.type == 'accept':
1✔
818
            to_cls = Protocol.for_id(inner_obj_id)
1✔
819
            if not to_cls or not to_cls.HAS_FOLLOW_ACCEPTS:
1✔
820
                return 'OK'  # noop
1✔
821

822
        elif obj.type == 'stop-following':
1✔
823
            # TODO: unify with handle_follow?
824
            # TODO: handle multiple followees
825
            if not actor_id or not inner_obj_id:
1✔
826
                error(f'Undo of Follow requires actor id and object id. Got: {actor_id} {inner_obj_id} {obj.as1}')
×
827

828
            # deactivate Follower
829
            from_ = from_cls.key_for(actor_id)
1✔
830
            to_cls = Protocol.for_id(inner_obj_id)
1✔
831
            to = to_cls.key_for(inner_obj_id)
1✔
832
            follower = Follower.query(Follower.to == to,
1✔
833
                                      Follower.from_ == from_,
834
                                      Follower.status == 'active').get()
835
            if follower:
1✔
836
                logger.info(f'Marking {follower} inactive')
1✔
837
                follower.status = 'inactive'
1✔
838
                follower.put()
1✔
839
            else:
840
                logger.warning(f'No Follower found for {from_} => {to}')
1✔
841

842
            # fall through to deliver to followee
843
            # TODO: do we convert stop-following to webmention 410 of original
844
            # follow?
845

846
        elif obj.type in ('update', 'like', 'share'):  # require object
1✔
847
            if not inner_obj_id:
1✔
848
                error("Couldn't find id of object to update")
1✔
849

850
            # fall through to deliver to followers
851

852
        elif obj.type == 'delete':
1✔
853
            if not inner_obj_id:
1✔
854
                error("Couldn't find id of object to delete")
×
855

856
            logger.info(f'Marking Object {inner_obj_id} deleted')
1✔
857
            Object.get_or_create(inner_obj_id, deleted=True, authed_as=authed_as)
1✔
858

859
            # if this is an actor, deactivate its followers/followings
860
            # https://github.com/snarfed/bridgy-fed/issues/63
861
            deleted_user = from_cls.key_for(id=inner_obj_id)
1✔
862
            if deleted_user:
1✔
863
                logger.info(f'Deactivating Followers from or to = {inner_obj_id}')
1✔
864
                followers = Follower.query(OR(Follower.to == deleted_user,
1✔
865
                                              Follower.from_ == deleted_user)
866
                                           ).fetch()
867
                for f in followers:
1✔
868
                    f.status = 'inactive'
1✔
869
                ndb.put_multi(followers)
1✔
870

871
            # fall through to deliver to followers
872

873
        elif obj.type == 'block':
1✔
874
            proto = Protocol.for_bridgy_subdomain(inner_obj_id)
1✔
875
            if not proto:
1✔
876
                logger.info("Ignoring block, target isn't one of our protocol domains")
×
877
                return 'OK', 200
×
878

879
            from_user.disable_protocol(proto)
1✔
880
            proto.maybe_delete_copy(from_user)
1✔
881
            return 'OK', 200
1✔
882

883
        elif obj.type == 'post':
1✔
884
            to_cc = (as1.get_ids(inner_obj_as1, 'to')
1✔
885
                     + as1.get_ids(inner_obj_as1, 'cc'))
886
            if len(to_cc) == 1 and to_cc != [as2.PUBLIC_AUDIENCE]:
1✔
887
                proto = Protocol.for_bridgy_subdomain(to_cc[0])
1✔
888
                if proto:
1✔
889
                    # remove @-mentions of bot user in HTML links
890
                    soup = util.parse_html(inner_obj_as1.get('content', ''))
1✔
891
                    for link in soup.find_all('a'):
1✔
892
                        link.extract()
1✔
893
                    content = soup.get_text().strip().lower()
1✔
894
                    logger.info(f'got DM to {to_cc}: {content}')
1✔
895
                    if content in ('yes', 'ok'):
1✔
896
                        from_user.enable_protocol(proto)
1✔
897
                        proto.bot_follow(from_user)
1✔
898
                    elif content == 'no':
1✔
899
                        from_user.disable_protocol(proto)
1✔
900
                        proto.maybe_delete_copy(from_user)
1✔
901
                    return 'OK', 200
1✔
902

903
        # fetch actor if necessary
904
        if actor and actor.keys() == set(['id']):
1✔
905
            logger.info('Fetching actor so we have name, profile photo, etc')
1✔
906
            actor_obj = from_cls.load(actor['id'])
1✔
907
            if actor_obj and actor_obj.as1:
1✔
908
                obj.our_as1 = {**obj.as1, 'actor': actor_obj.as1}
1✔
909

910
        # fetch object if necessary so we can render it in feeds
911
        if (obj.type == 'share'
1✔
912
                and inner_obj_as1.keys() == set(['id'])
913
                and from_cls.owns_id(inner_obj_id)):
914
            logger.info('Fetching object so we can render it in feeds')
1✔
915
            inner_obj = from_cls.load(inner_obj_id)
1✔
916
            if inner_obj and inner_obj.as1:
1✔
917
                obj.our_as1 = {
1✔
918
                    **obj.as1,
919
                    'object': {
920
                        **inner_obj_as1,
921
                        **inner_obj.as1,
922
                    }
923
                }
924

925
        if obj.type == 'follow':
1✔
926
            proto = Protocol.for_bridgy_subdomain(inner_obj_id)
1✔
927
            if proto:
1✔
928
                # follow of one of our protocol bot users; enable that protocol.
929
                # foll through so that we send an accept.
930
                from_user.enable_protocol(proto)
1✔
931
                proto.bot_follow(from_user)
1✔
932

933
            from_cls.handle_follow(obj)
1✔
934

935
        # deliver to targets
936
        return from_cls.deliver(obj, from_user=from_user)
1✔
937

938
    @classmethod
1✔
939
    def handle_follow(from_cls, obj):
1✔
940
        """Handles an incoming follow activity.
941

942
        Sends an ``Accept`` back, but doesn't send the ``Follow`` itself. That
943
        happens in :meth:`deliver`.
944

945
        Args:
946
          obj (models.Object): follow activity
947
        """
948
        logger.info('Got follow. Loading users, storing Follow(s), sending accept(s)')
1✔
949

950
        # Prepare follower (from) users' data
951
        from_as1 = as1.get_object(obj.as1, 'actor')
1✔
952
        from_id = from_as1.get('id')
1✔
953
        if not from_id:
1✔
954
            error(f'Follow activity requires actor. Got: {obj.as1}')
×
955

956
        from_obj = from_cls.load(from_id)
1✔
957
        if not from_obj:
1✔
958
            error(f"Couldn't load {from_id}")
×
959

960
        if not from_obj.as1:
1✔
961
            from_obj.our_as1 = from_as1
1✔
962
            from_obj.put()
1✔
963

964
        from_key = from_cls.key_for(from_id)
1✔
965
        if not from_key:
1✔
966
            error(f'Invalid {from_cls} user key: {from_id}')
×
967
        obj.users = [from_key]
1✔
968
        from_user = from_cls.get_or_create(id=from_key.id(), obj=from_obj)
1✔
969

970
        # Prepare followee (to) users' data
971
        to_as1s = as1.get_objects(obj.as1)
1✔
972
        if not to_as1s:
1✔
973
            error(f'Follow activity requires object(s). Got: {obj.as1}')
1✔
974

975
        # Store Followers
976
        for to_as1 in to_as1s:
1✔
977
            to_id = to_as1.get('id')
1✔
978
            if not to_id:
1✔
979
                error(f'Follow activity requires object(s). Got: {obj.as1}')
×
980

981
            logger.info(f'Follow {from_id} => {to_id}')
1✔
982

983
            to_cls = Protocol.for_id(to_id)
1✔
984
            if not to_cls:
1✔
985
                error(f"Couldn't determine protocol for {to_id}")
1✔
986
            elif from_cls == to_cls and from_cls.LABEL != 'fake':
1✔
987
                logger.info(f'Skipping same-protocol Follower {from_id} => {to_id}')
1✔
988
                continue
1✔
989

990
            to_obj = to_cls.load(to_id)
1✔
991
            if to_obj and not to_obj.as1:
1✔
992
                to_obj.our_as1 = to_as1
1✔
993
                to_obj.put()
1✔
994

995
            to_key = to_cls.key_for(to_id)
1✔
996
            if not to_key:
1✔
997
                logger.info(f'Skipping invalid {from_cls} user key: {from_id}')
×
998
                continue
×
999

1000
            # If followee user is already direct, follower may not know they're
1001
            # interacting with a bridge. if followee user is indirect though,
1002
            # follower should know, so they're direct.
1003
            to_user = to_cls.get_or_create(id=to_key.id(), obj=to_obj, direct=False)
1✔
1004
            follower_obj = Follower.get_or_create(to=to_user, from_=from_user,
1✔
1005
                                                  follow=obj.key, status='active')
1006
            obj.add('notify', to_key)
1✔
1007
            from_cls.maybe_accept_follow(follower=from_user, followee=to_user,
1✔
1008
                                         follow=obj)
1009

1010
    @classmethod
1✔
1011
    def maybe_accept_follow(_, follower, followee, follow):
1✔
1012
        """Sends an accept activity for a follow.
1013

1014
        ...if the follower protocol handles accepts. Otherwise, does nothing.
1015

1016
        Args:
1017
          follower: :class:`models.User`
1018
          followee: :class:`models.User`
1019
          follow: :class:`models.Object`
1020
        """
1021
        if followee.HAS_FOLLOW_ACCEPTS:
1✔
1022
            return
1✔
1023

1024
        # send accept. note that this is one accept for the whole
1025
        # follow, even if it has multiple followees!
1026
        id = f'{followee.key.id()}/followers#accept-{follow.key.id()}'
1✔
1027
        accept = Object.get_or_create(id, our_as1={
1✔
1028
            'id': id,
1029
            'objectType': 'activity',
1030
            'verb': 'accept',
1031
            'actor': followee.key.id(),
1032
            'object': follow.as1,
1033
        })
1034

1035
        from_target = follower.target_for(follower.obj)
1✔
1036
        if not from_target:
1✔
1037
            error(f"Couldn't find delivery target for follower {follower}")
×
1038

1039
        sent = follower.send(accept, from_target, from_user=followee)
1✔
1040
        if sent:
1✔
1041
            accept.populate(
1✔
1042
                delivered=[Target(protocol=follower.LABEL, uri=from_target)],
1043
                status='complete',
1044
            )
1045
            accept.put()
1✔
1046

1047
    @classmethod
1✔
1048
    def bot_follow(bot_cls, user):
1✔
1049
        """Follow a user from a protocol bot user.
1050

1051
        ...so that the protocol starts sending us their activities, if it needs
1052
        a follow for that (eg ActivityPub).
1053

1054
        Args:
1055
          user (User)
1056
        """
1057
        from web import Web
1✔
1058
        bot = Web.get_by_id(bot_cls.bot_user_id())
1✔
1059
        now = util.now().isoformat()
1✔
1060
        logger.info(f'Following {user.key.id()} back from bot user {bot.key.id()}')
1✔
1061

1062
        target = user.target_for(user.obj)
1✔
1063
        follow_back_id = f'https://{bot.key.id()}/#follow-back-{user.key.id()}-{now}'
1✔
1064
        follow_back = Object(id=follow_back_id, source_protocol='web',
1✔
1065
                             undelivered=[Target(protocol=user.LABEL, uri=target)],
1066
                             our_as1={
1067
            'objectType': 'activity',
1068
            'verb': 'follow',
1069
            'id': follow_back_id,
1070
            'actor': bot.key.id(),
1071
            'object': user.key.id(),
1072
        }).put()
1073

1074
        common.create_task(queue='send', obj=follow_back.urlsafe(),
1✔
1075
                           url=target, protocol=user.LABEL,
1076
                           user=bot.key.urlsafe())
1077

1078
    @classmethod
1✔
1079
    def maybe_delete_copy(copy_cls, user):
1✔
1080
        """Deletes a user's copy actor in a given protocol.
1081

1082
        ...if ``copy_cls`` 's :attr:`Protocol.HAS_COPIES` is True. Otherwise,
1083
        does nothing.
1084

1085
        TODO: this should eventually go through receive for protocols that need
1086
        to deliver to all followers' targets, eg AP.
1087

1088
        Args:
1089
          user (User)
1090
        """
1091
        if not copy_cls.HAS_COPIES:
1✔
1092
            return
×
1093

1094
        copy_user_id = user.get_copy(copy_cls)
1✔
1095
        if not copy_user_id:
1✔
1096
            logger.warning(f"Tried to delete {user.key} copy for {copy_cls.LABEL}, which doesn't exist!")
1✔
1097
            return
1✔
1098

1099
        now = util.now().isoformat()
1✔
1100
        delete_id = f'{user.key.id()}#delete-copy-{copy_cls.LABEL}-{now}'
1✔
1101
        delete = Object(id=delete_id, source_protocol=user.LABEL, our_as1={
1✔
1102
            'id': delete_id,
1103
            'objectType': 'activity',
1104
            'verb': 'delete',
1105
            'actor': user.key.id(),
1106
            'object': copy_user_id,
1107
        })
1108

1109
        target_uri = copy_cls.target_for(Object(id=copy_user_id))
1✔
1110
        delete.undelivered = [Target(protocol=copy_cls.LABEL, uri=target_uri)]
1✔
1111
        delete.put()
1✔
1112

1113
        common.create_task(queue='send', obj=delete.key.urlsafe(),
1✔
1114
                           url=target_uri, protocol=copy_cls.LABEL,
1115
                           user=user.key.urlsafe())
1116

1117
    @classmethod
1✔
1118
    def handle_bare_object(cls, obj, authed_as=None):
1✔
1119
        """If obj is a bare object, wraps it in a create or update activity.
1120

1121
        Checks if we've seen it before.
1122

1123
        Args:
1124
          obj (models.Object)
1125
          authed_as (str): authenticated actor id who sent this activity
1126

1127
        Returns:
1128
          models.Object: ``obj`` if it's an activity, otherwise a new object
1129
        """
1130
        is_actor = obj.type in as1.ACTOR_TYPES
1✔
1131
        if not is_actor and obj.type not in ('note', 'article', 'comment'):
1✔
1132
            return obj
1✔
1133

1134
        obj_actor = as1.get_owner(obj.as1)
1✔
1135
        now = util.now().isoformat()
1✔
1136

1137
        # this is a raw post; wrap it in a create or update activity
1138
        if obj.changed or is_actor:
1✔
1139
            if obj.changed:
1✔
1140
                logger.info(f'Content has changed from last time at {obj.updated}! Redelivering to all inboxes')
1✔
1141
            id = f'{obj.key.id()}#bridgy-fed-update-{now}'
1✔
1142
            update_as1 = {
1✔
1143
                'objectType': 'activity',
1144
                'verb': 'update',
1145
                'id': id,
1146
                'actor': obj_actor,
1147
                'object': {
1148
                    # Mastodon requires the updated field for Updates, so
1149
                    # add a default value.
1150
                    # https://docs.joinmastodon.org/spec/activitypub/#supported-activities-for-statuses
1151
                    # https://socialhub.activitypub.rocks/t/what-could-be-the-reason-that-my-update-activity-does-not-work/2893/4
1152
                    # https://github.com/mastodon/documentation/pull/1150
1153
                    'updated': now,
1154
                    **obj.as1,
1155
                },
1156
            }
1157
            logger.info(f'Wrapping in update: {json_dumps(update_as1, indent=2)}')
1✔
1158
            return Object(id=id, our_as1=update_as1,
1✔
1159
                          source_protocol=obj.source_protocol)
1160

1161
        create_id = f'{obj.key.id()}#bridgy-fed-create'
1✔
1162
        create = cls.load(create_id, remote=False)
1✔
1163
        if (obj.new or not create or create.status != 'complete'
1✔
1164
                # HACK: force query param here is specific to webmention
1165
                or 'force' in request.form):
1166
            if create:
1✔
1167
                logger.info(f'Existing create {create.key} status {create.status}')
1✔
1168
            else:
1169
                logger.info(f'No existing create activity')
1✔
1170
            create_as1 = {
1✔
1171
                'objectType': 'activity',
1172
                'verb': 'post',
1173
                'id': create_id,
1174
                'actor': obj_actor,
1175
                'object': obj.as1,
1176
                'published': now,
1177
            }
1178
            logger.info(f'Wrapping in post: {json_dumps(create_as1, indent=2)}')
1✔
1179
            return Object.get_or_create(create_id, our_as1=create_as1,
1✔
1180
                                        source_protocol=obj.source_protocol,
1181
                                        authed_as=authed_as)
1182

1183
        error(f'{obj.key.id()} is unchanged, nothing to do', status=204)
1✔
1184

1185
    @classmethod
1✔
1186
    def deliver(from_cls, obj, from_user):
1✔
1187
        """Delivers an activity to its external recipients.
1188

1189
        Args:
1190
          obj (models.Object): activity to deliver
1191
          from_user (models.User): user (actor) this activity is from
1192
        """
1193
        # find delivery targets. maps Target to Object or None
1194
        targets = from_cls.targets(obj, from_user=from_user)
1✔
1195

1196
        if not targets:
1✔
1197
            obj.status = 'ignored'
1✔
1198
            obj.put()
1✔
1199
            error(r'No targets, nothing to do ¯\_(ツ)_/¯', status=204)
1✔
1200

1201
        # sort targets so order is deterministic for tests, debugging, etc
1202
        sorted_targets = sorted(targets.items(), key=lambda t: t[0].uri)
1✔
1203
        obj.populate(
1✔
1204
            status='in progress',
1205
            delivered=[],
1206
            failed=[],
1207
            undelivered=[t for t, _ in sorted_targets],
1208
        )
1209
        obj.put()
1✔
1210
        logger.info(f'Delivering to: {obj.undelivered}')
1✔
1211

1212
        # enqueue send task for each targets
1213
        user = from_user.key.urlsafe()
1✔
1214
        for i, (target, orig_obj) in enumerate(sorted_targets):
1✔
1215
            orig_obj = orig_obj.key.urlsafe() if orig_obj else ''
1✔
1216
            common.create_task(queue='send', obj=obj.key.urlsafe(),
1✔
1217
                               url=target.uri, protocol=target.protocol,
1218
                               orig_obj=orig_obj, user=user)
1219

1220
        return 'OK', 202
1✔
1221

1222
    @classmethod
1✔
1223
    def targets(cls, obj, from_user):
1✔
1224
        """Collects the targets to send a :class:`models.Object` to.
1225

1226
        Targets are both objects - original posts, events, etc - and actors.
1227

1228
        Args:
1229
          obj (models.Object)
1230
          from_user (User)
1231

1232
        Returns:
1233
          dict: maps :class:`models.Target` to original (in response to)
1234
          :class:`models.Object`, if any, otherwise None
1235
        """
1236
        logger.info('Finding recipients and their targets')
1✔
1237

1238
        target_uris = sorted(set(as1.targets(obj.as1)))
1✔
1239
        logger.info(f'Raw targets: {target_uris}')
1✔
1240
        orig_obj = None
1✔
1241
        targets = {}  # maps Target to Object or None
1✔
1242
        owner = as1.get_owner(obj.as1)
1✔
1243

1244
        in_reply_to_protocols = set()  # protocol kinds, eg 'MagicKey'
1✔
1245
        in_reply_to_owners = []
1✔
1246
        in_reply_tos = as1.get_ids(as1.get_object(obj.as1), 'inReplyTo')
1✔
1247
        for in_reply_to in in_reply_tos:
1✔
1248
            if proto := Protocol.for_id(in_reply_to):
1✔
1249
                if in_reply_to_obj := proto.load(in_reply_to):
1✔
1250
                    if proto.LABEL != obj.source_protocol:
1✔
1251
                        in_reply_to_protocols.add(proto._get_kind())
1✔
1252
                    else:
1253
                        proto_labels = proto.DEFAULT_ENABLED_PROTOCOLS + tuple(
1✔
1254
                            c.protocol for c in in_reply_to_obj.copies)
1255
                        in_reply_to_protocols.update(PROTOCOLS[c.protocol]._get_kind()
1✔
1256
                                                     for c in in_reply_to_obj.copies)
1257

1258
                    if reply_owner := as1.get_owner(in_reply_to_obj.as1):
1✔
1259
                        in_reply_to_owners.append(reply_owner)
1✔
1260

1261
        is_reply = obj.type == 'comment' or in_reply_tos
1✔
1262
        is_self_reply = False
1✔
1263
        if is_reply:
1✔
1264
            logger.info(f"It's a reply...")
1✔
1265
            if in_reply_to_protocols:
1✔
1266
                logger.info(f'  ...delivering to these protocols where the in-reply-to post was native or bridged: {in_reply_to_protocols}')
1✔
1267
            else:
1268
                logger.info(f"  ...skipping, in-reply-to post(s) are same protocol and weren't bridged anywhere")
1✔
1269
                return {}
1✔
1270

1271
        for id in sorted(target_uris):
1✔
1272
            protocol = Protocol.for_id(id)
1✔
1273
            if not protocol:
1✔
1274
                logger.info(f"Can't determine protocol for {id}")
1✔
1275
                continue
1✔
1276
            elif protocol.is_blocklisted(id):
1✔
1277
                logger.info(f'{id} is blocklisted')
1✔
1278
                continue
1✔
1279

1280
            orig_obj = protocol.load(id)
1✔
1281
            if not orig_obj or not orig_obj.as1:
1✔
1282
                logger.info(f"Couldn't load {id}")
1✔
1283
                continue
1✔
1284

1285
            # deliver self-replies to followers
1286
            # https://github.com/snarfed/bridgy-fed/issues/639
1287
            if owner == as1.get_owner(orig_obj.as1):
1✔
1288
                is_self_reply = True
1✔
1289
                logger.info(f'Looks like a self reply! Delivering to followers')
1✔
1290

1291
            if protocol == cls and cls.LABEL != 'fake':
1✔
1292
                logger.info(f'Skipping same-protocol target {id}')
1✔
1293
                continue
1✔
1294
            elif id in in_reply_to_owners:
1✔
1295
                logger.info(f'Skipping mention of in-reply-to author')
1✔
1296
                continue
1✔
1297

1298
            target = protocol.target_for(orig_obj)
1✔
1299
            if not target:
1✔
1300
                # TODO: surface errors like this somehow?
1301
                logger.error(f"Can't find delivery target for {id}")
×
1302
                continue
×
1303

1304
            logger.info(f'Target for {id} is {target}')
1✔
1305
            targets[Target(protocol=protocol.LABEL, uri=target)] = orig_obj
1✔
1306
            orig_user = protocol.actor_key(orig_obj)
1✔
1307
            if orig_user:
1✔
1308
                logger.info(f'Recipient is {orig_user}')
1✔
1309
                obj.add('notify', orig_user)
1✔
1310

1311
        logger.info(f'Direct targets: {targets.keys()}')
1✔
1312

1313
        # deliver to followers, if appropriate
1314
        user_key = cls.actor_key(obj)
1✔
1315
        if not user_key:
1✔
1316
            logger.info("Can't tell who this is from! Skipping followers.")
1✔
1317
            return targets
1✔
1318

1319
        if (obj.type in ('post', 'update', 'delete', 'share')
1✔
1320
                and (not is_reply or (is_self_reply and in_reply_to_protocols))):
1321
            logger.info(f'Delivering to followers of {user_key}')
1✔
1322
            followers = [f for f in Follower.query(Follower.to == user_key,
1✔
1323
                                                   Follower.status == 'active')
1324
                         # skip protocol bot users
1325
                         if not Protocol.for_bridgy_subdomain(f.from_.id())]
1326
            user_keys = [f.from_ for f in followers]
1✔
1327
            if is_reply:
1✔
1328
                user_keys = [k for k in user_keys if k.kind() in in_reply_to_protocols]
1✔
1329
            users = [u for u in ndb.get_multi(user_keys) if u]
1✔
1330
            User.load_multi(users)
1✔
1331

1332
            # which object should we add to followers' feeds, if any
1333
            feed_obj = None
1✔
1334
            if obj.type == 'share':
1✔
1335
                feed_obj = obj
1✔
1336
            else:
1337
                inner = as1.get_object(obj.as1)
1✔
1338
                # don't add profile updates to feeds
1339
                if not (obj.type == 'update'
1✔
1340
                        and inner.get('objectType') in as1.ACTOR_TYPES):
1341
                    inner_id = inner.get('id')
1✔
1342
                    if inner_id:
1✔
1343
                        feed_obj = cls.load(inner_id)
1✔
1344

1345
            # include ATProto if this user is enabled there.
1346
            # TODO: abstract across protocols. maybe with this, below
1347
            # targets.update({
1348
            #     Target(protocol=proto.LABEL,
1349
            #            uri=proto.target_for(proto.bot_user_id())): None
1350
            #     for proto in PROTOCOLS
1351
            #     if proto and proto.HAS_COPIES
1352
            # })
1353

1354
            if 'atproto' in from_user.enabled_protocols:
1✔
1355
                if (not followers and
1✔
1356
                    (util.domain_or_parent_in(
1357
                        util.domain_from_link(from_user.key.id()), LIMITED_DOMAINS)
1358
                     or util.domain_or_parent_in(
1359
                         util.domain_from_link(obj.key.id()), LIMITED_DOMAINS))):
1360
                    logger.info(f'skipping ATProto, {from_user.key.id()} is on a limited domain and has no followers')
1✔
1361

1362
                elif (is_reply or obj.type == 'share') and not targets:
1✔
1363
                    logger.info(f"skipping ATProto, repost of or reply to post that wasn't bridged")
1✔
1364

1365
                else:
1366
                    from atproto import ATProto
1✔
1367
                    targets.setdefault(
1✔
1368
                        Target(protocol=ATProto.LABEL, uri=ATProto.PDS_URL), None)
1369
                    logger.info(f'user has ATProto enabled, adding {ATProto.PDS_URL}')
1✔
1370

1371
            for user in users:
1✔
1372
                if feed_obj:
1✔
1373
                    feed_obj.add('feed', user.key)
1✔
1374

1375
                # TODO: should we pass remote=False through here to Protocol.load?
1376
                target = user.target_for(user.obj, shared=True) if user.obj else None
1✔
1377
                if not target:
1✔
1378
                    # TODO: surface errors like this somehow?
1379
                    logger.error(f'Follower {user.key} has no delivery target')
1✔
1380
                    continue
1✔
1381

1382
                # normalize URL (lower case hostname, etc)
1383
                # ...but preserve our PDS URL without trailing slash in path
1384
                # https://atproto.com/specs/did#did-documents
1385
                target = util.dedupe_urls([target], trailing_slash=False)[0]
1✔
1386

1387
                # HACK: use last target object from above for reposts, which
1388
                # has its resolved id
1389
                targets[Target(protocol=user.LABEL, uri=target)] = \
1✔
1390
                    orig_obj if obj.type == 'share' else None
1391

1392
            if feed_obj:
1✔
1393
                feed_obj.put()
1✔
1394

1395
        # de-dupe targets, discard same-domain
1396
        # maps string target URL to (Target, Object) tuple
1397
        candidates = {t.uri: (t, obj) for t, obj in targets.items()}
1✔
1398
        # maps Target to Object or None
1399
        targets = {}
1✔
1400
        source_domains = [
1✔
1401
            util.domain_from_link(url) for url in
1402
            (obj.as1.get('id'), obj.as1.get('url'), as1.get_owner(obj.as1))
1403
            if util.is_web(url)
1404
        ]
1405
        for url in sorted(util.dedupe_urls(
1✔
1406
                candidates.keys(),
1407
                # preserve our PDS URL without trailing slash in path
1408
                # https://atproto.com/specs/did#did-documents
1409
                trailing_slash=False)):
1410
            if util.is_web(url) and util.domain_from_link(url) in source_domains:
1✔
1411
                logger.info(f'Skipping same-domain target {url}')
×
1412
            else:
1413
                target, obj = candidates[url]
1✔
1414
                targets[target] = obj
1✔
1415

1416
        return targets
1✔
1417

1418
    @classmethod
1✔
1419
    def load(cls, id, remote=None, local=True, **kwargs):
1✔
1420
        """Loads and returns an Object from memory cache, datastore, or HTTP fetch.
1421

1422
        Sets the :attr:`new` and :attr:`changed` attributes if we know either
1423
        one for the loaded object, ie local is True and remote is True or None.
1424

1425
        Note that :meth:`Object._post_put_hook` updates the cache.
1426

1427
        Args:
1428
          id (str)
1429
          remote (bool): whether to fetch the object over the network. If True,
1430
            fetches even if we already have the object stored, and updates our
1431
            stored copy. If False and we don't have the object stored, returns
1432
            None. Default (None) means to fetch over the network only if we
1433
            don't already have it stored.
1434
          local (bool): whether to load from the datastore before
1435
            fetching over the network. If False, still stores back to the
1436
            datastore after a successful remote fetch.
1437
          kwargs: passed through to :meth:`fetch()`
1438

1439
        Returns:
1440
          models.Object: loaded object, or None if it isn't fetchable, eg a
1441
          non-URL string for Web, or ``remote`` is False and it isn't in the
1442
          cache or datastore
1443

1444
        Raises:
1445
          requests.HTTPError: anything that :meth:`fetch` raises
1446
        """
1447
        assert id
1✔
1448
        assert local or remote is not False
1✔
1449
        # logger.debug(f'Loading Object {id} local={local} remote={remote}')
1450

1451
        obj = orig_as1 = None
1✔
1452
        with objects_cache_lock:
1✔
1453
            cached = objects_cache.get(id)
1✔
1454
            if cached:
1✔
1455
                # make a copy so that if the client modifies this entity in
1456
                # memory, those modifications aren't applied to the cache
1457
                # until they explicitly put() the modified entity.
1458
                # NOTE: keep in sync with Object._post_put_hook!
1459
                # logger.debug('  got from cache')
1460
                obj = Object(id=cached.key.id(), **cached.to_dict(
1✔
1461
                    # computed properties
1462
                    exclude=['as1', 'expire', 'object_ids', 'type']))
1463

1464
        if local and not obj:
1✔
1465
            obj = Object.get_by_id(id)
1✔
1466
            if not obj:
1✔
1467
                # logger.debug(f' not in datastore')
1468
                pass
1✔
1469
            elif obj.as1 or obj.raw or obj.deleted:
1✔
1470
                # logger.debug('  got from datastore')
1471
                obj.new = False
1✔
1472
                if remote is not True:
1✔
1473
                    with objects_cache_lock:
1✔
1474
                        objects_cache[id] = obj
1✔
1475

1476
        if remote is False:
1✔
1477
            return obj
1✔
1478
        elif remote is None and obj:
1✔
1479
            if obj.updated < util.as_utc(util.now() - OBJECT_REFRESH_AGE):
1✔
1480
                # logger.debug(f'  last updated {obj.updated}, refreshing')
1481
                pass
1✔
1482
            else:
1483
                return obj
1✔
1484

1485
        if obj:
1✔
1486
            orig_as1 = obj.as1
1✔
1487
            obj.clear()
1✔
1488
            obj.new = False
1✔
1489
        else:
1490
            obj = Object(id=id)
1✔
1491
            if local:
1✔
1492
                # logger.debug('  not in datastore')
1493
                obj.new = True
1✔
1494
                obj.changed = False
1✔
1495

1496
        fetched = cls.fetch(obj, **kwargs)
1✔
1497
        if not fetched:
1✔
1498
            return None
1✔
1499

1500
        # https://stackoverflow.com/a/3042250/186123
1501
        size = len(_entity_to_protobuf(obj)._pb.SerializeToString())
1✔
1502
        if size > models.MAX_ENTITY_SIZE:
1✔
1503
            logger.warning(f'Object is too big! {size} bytes is over {models.MAX_ENTITY_SIZE}')
1✔
1504
            return None
1✔
1505

1506
        obj.resolve_ids()
1✔
1507
        obj.normalize_ids()
1✔
1508

1509
        if obj.new is False:
1✔
1510
            obj.changed = obj.activity_changed(orig_as1)
1✔
1511

1512
        if obj.source_protocol not in (cls.LABEL, cls.ABBREV):
1✔
1513
            if obj.source_protocol:
1✔
1514
                logger.warning(f'Object {obj.key.id()} changed protocol from {obj.source_protocol} to {cls.LABEL} ?!')
×
1515
            obj.source_protocol = cls.LABEL
1✔
1516

1517
        obj.put()
1✔
1518
        with objects_cache_lock:
1✔
1519
            objects_cache[id] = obj
1✔
1520
        return obj
1✔
1521

1522

1523
@cloud_tasks_only
1✔
1524
def receive_task():
1✔
1525
    """Task handler for a newly received :class:`models.Object`.
1526

1527
    Calls :meth:`Protocol.receive` with the form parameters.
1528

1529
    Parameters:
1530
      obj (url-safe google.cloud.ndb.key.Key): :class:`models.Object` to handle
1531
      authed_as (str): passed to :meth:`Protocol.receive`
1532

1533
    TODO: migrate incoming webmentions and AP inbox deliveries to this. The
1534
    difficulty is that parts of :meth:`protocol.Protocol.receive` depend on
1535
    setup in :func:`web.webmention` and :func:`activitypub.inbox`, eg
1536
    :class:`models.Object` with ``new`` and ``changed``, HTTP request details,
1537
    etc. See stash for attempt at this for :class:`web.Web`.
1538
    """
1539
    form = request.form.to_dict()
1✔
1540
    logger.info(f'Params: {list(form.items())}')
1✔
1541

1542
    obj = ndb.Key(urlsafe=form['obj']).get()
1✔
1543
    assert obj
1✔
1544
    obj.new = True
1✔
1545

1546
    authed_as = form.get('authed_as')
1✔
1547

1548
    internal = (authed_as == common.PRIMARY_DOMAIN
1✔
1549
                or authed_as in common.PROTOCOL_DOMAINS)
1550
    try:
1✔
1551
        return PROTOCOLS[obj.source_protocol].receive(obj=obj, authed_as=authed_as,
1✔
1552
                                                      internal=internal)
1553
    except ValueError as e:
1✔
1554
        logger.warning(e, exc_info=True)
1✔
1555
        error(e, status=304)
1✔
1556

1557

1558
@cloud_tasks_only
1✔
1559
def send_task():
1✔
1560
    """Task handler for sending an activity to a single specific destination.
1561

1562
    Calls :meth:`Protocol.send` with the form parameters.
1563

1564
    Parameters:
1565
      protocol (str): :class:`Protocol` to send to
1566
      url (str): destination URL to send to
1567
      obj (url-safe google.cloud.ndb.key.Key): :class:`models.Object` to send
1568
      orig_obj (url-safe google.cloud.ndb.key.Key): optional "original object"
1569
        :class:`models.Object` that this object refers to, eg replies to or
1570
        reposts or likes
1571
      user (url-safe google.cloud.ndb.key.Key): :class:`models.User` (actor)
1572
        this activity is from
1573
    """
1574
    form = request.form.to_dict()
1✔
1575
    logger.info(f'Params: {list(form.items())}')
1✔
1576

1577
    # prepare
1578
    url = form.get('url')
1✔
1579
    protocol = form.get('protocol')
1✔
1580
    if not url or not protocol:
1✔
1581
        logger.warning(f'Missing protocol or url; got {protocol} {url}')
1✔
1582
        return '', 204
1✔
1583

1584
    target = Target(uri=url, protocol=protocol)
1✔
1585

1586
    obj = ndb.Key(urlsafe=form['obj']).get()
1✔
1587
    if (target not in obj.undelivered and target not in obj.failed
1✔
1588
            and 'force' not in request.values):
UNCOV
1589
        logger.info(f"{url} not in {obj.key.id()} undelivered or failed, giving up")
×
UNCOV
1590
        return r'¯\_(ツ)_/¯', 204
×
1591

1592
    user = None
1✔
1593
    if user_key := form.get('user'):
1✔
1594
        user = ndb.Key(urlsafe=user_key).get()
1✔
1595
    orig_obj = (ndb.Key(urlsafe=form['orig_obj']).get()
1✔
1596
                if form.get('orig_obj') else None)
1597

1598
    # send
1599
    logger.info(f'Sending {obj.key.id()} AS1: {json_dumps(obj.as1, indent=2)}')
1✔
1600
    sent = None
1✔
1601
    try:
1✔
1602
        sent = PROTOCOLS[protocol].send(obj, url, from_user=user, orig_obj=orig_obj)
1✔
1603
    except BaseException as e:
1✔
1604
        code, body = util.interpret_http_exception(e)
1✔
1605
        if not code and not body:
1✔
UNCOV
1606
            logger.info(str(e), exc_info=True)
×
1607

1608
    if sent is False:
1✔
1609
        logger.info(f'Failed sending {obj.key.id()} to {url}')
1✔
1610

1611
    # write results to Object
1612
    @ndb.transactional()
1✔
1613
    def update_object(obj_key):
1✔
1614
        obj = obj_key.get()
1✔
1615
        if target in obj.undelivered:
1✔
1616
            obj.remove('undelivered', target)
1✔
1617

1618
        if sent is None:
1✔
1619
            obj.add('failed', target)
1✔
1620
        else:
1621
            if target in obj.failed:
1✔
UNCOV
1622
                obj.remove('failed', target)
×
1623
            if sent:
1✔
1624
                obj.add('delivered', target)
1✔
1625

1626
        if not obj.undelivered:
1✔
1627
            obj.status = ('complete' if obj.delivered
1✔
1628
                          else 'failed' if obj.failed
1629
                          else 'ignored')
1630
        obj.put()
1✔
1631

1632
    update_object(obj.key)
1✔
1633

1634
    return '', 200 if sent else 304
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc