• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

snarfed / bridgy-fed / 56f00275-79dc-46de-a2ee-e39c2d344a4f

03 Jun 2024 11:21PM UTC coverage: 94.007% (-0.02%) from 94.03%
56f00275-79dc-46de-a2ee-e39c2d344a4f

push

circleci

snarfed
drop Object cache in Protocol.load

4 of 4 new or added lines in 1 file covered. (100.0%)

66 existing lines in 3 files now uncovered.

3733 of 3971 relevant lines covered (94.01%)

0.94 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

94.51
/protocol.py
1
"""Base protocol class and common code."""
2
import copy
1✔
3
from datetime import timedelta
1✔
4
import logging
1✔
5
import re
1✔
6
from threading import Lock
1✔
7
from urllib.parse import urljoin, urlparse
1✔
8

9
from cachetools import cached, LRUCache
1✔
10
from flask import request
1✔
11
from google.cloud import ndb
1✔
12
from google.cloud.ndb import OR
1✔
13
from google.cloud.ndb.model import _entity_to_protobuf
1✔
14
from granary import as1, as2
1✔
15
from granary.source import html_to_text
1✔
16
from oauth_dropins.webutil.appengine_info import DEBUG
1✔
17
from oauth_dropins.webutil.flask_util import cloud_tasks_only
1✔
18
from oauth_dropins.webutil import models
1✔
19
from oauth_dropins.webutil import util
1✔
20
from oauth_dropins.webutil.util import json_dumps, json_loads
1✔
21
import werkzeug.exceptions
1✔
22

23
import common
1✔
24
from common import (
1✔
25
    add,
26
    DOMAIN_BLOCKLIST,
27
    DOMAIN_RE,
28
    DOMAINS,
29
    error,
30
    PRIMARY_DOMAIN,
31
    PROTOCOL_DOMAINS,
32
    report_error,
33
    subdomain_wrap,
34
)
35
from ids import (
1✔
36
    BOT_ACTOR_AP_IDS,
37
    normalize_user_id,
38
    translate_object_id,
39
    translate_user_id,
40
)
41
from models import Follower, get_originals, Object, PROTOCOLS, Target, User
1✔
42

43
SUPPORTED_TYPES = (
1✔
44
    'accept',
45
    'article',
46
    'audio',
47
    'block',
48
    'comment',
49
    'delete',
50
    'follow',
51
    'image',
52
    'like',
53
    'note',
54
    'post',
55
    'share',
56
    'stop-following',
57
    'undo',
58
    'update',
59
    'video',
60
)
61

62
OBJECT_REFRESH_AGE = timedelta(days=30)
1✔
63

64
# require a follow for users on these domains before we deliver anything from
65
# them other than their profile
66
LIMITED_DOMAINS = util.load_file_lines('limited_domains')
1✔
67

68
# activity ids that we've already handled and can now ignore.
69
# used in Protocol.receive
70
seen_ids = LRUCache(100000)
1✔
71
seen_ids_lock = Lock()
1✔
72

73
logger = logging.getLogger(__name__)
1✔
74

75

76
class Protocol:
1✔
77
    """Base protocol class. Not to be instantiated; classmethods only.
78

79
    Attributes:
80
      LABEL (str): human-readable lower case name
81
      OTHER_LABELS (list of str): label aliases
82
      ABBREV (str): lower case abbreviation, used in URL paths
83
      PHRASE (str): human-readable name or phrase. Used in phrases like
84
        ``Follow this person on {PHRASE}``
85
      LOGO_HTML (str): logo emoji or ``<img>`` tag
86
      CONTENT_TYPE (str): MIME type of this protocol's native data format,
87
        appropriate for the ``Content-Type`` HTTP header.
88
      HAS_FOLLOW_ACCEPTS (bool): whether this protocol supports explicit
89
        accept/reject activities in response to follows, eg ActivityPub
90
      HAS_COPIES (bool): whether this protocol is push and needs us to
91
        proactively create "copy" users and objects, as opposed to pulling
92
        converted objects on demand
93
      REQUIRES_AVATAR (bool): whether accounts on this protocol are required
94
        to have a profile picture. If they don't, their ``User.status`` will be
95
        ``blocked``.
96
      REQUIRES_NAME (bool): whether accounts on this protocol are required to
97
        have a profile name that's different than their handle or id. If they
98
        don't, their ``User.status`` will be ``blocked``.
99
      REQUIRES_OLD_ACCOUNT: (bool): whether accounts on this protocol are
100
        required to be at least :const:`common.OLD_ACCOUNT_AGE` old. If their
101
        profile includes creation date and it's not old enough, their
102
        ``User.status`` will be ``blocked``.
103
      DEFAULT_ENABLED_PROTOCOLS (list of str): labels of other protocols that
104
        are automatically enabled for this protocol to bridge into
105
    """
106
    ABBREV = None
1✔
107
    PHRASE = None
1✔
108
    OTHER_LABELS = ()
1✔
109
    LOGO_HTML = ''
1✔
110
    CONTENT_TYPE = None
1✔
111
    HAS_FOLLOW_ACCEPTS = False
1✔
112
    HAS_COPIES = False
1✔
113
    REQUIRES_AVATAR = False
1✔
114
    REQUIRES_NAME = False
1✔
115
    REQUIRES_OLD_ACCOUNT = False
1✔
116
    DEFAULT_ENABLED_PROTOCOLS = ()
1✔
117

118
    def __init__(self):
1✔
UNCOV
119
        assert False
×
120

121
    @classmethod
1✔
122
    @property
1✔
123
    def LABEL(cls):
1✔
124
        return cls.__name__.lower()
1✔
125

126
    @staticmethod
1✔
127
    def for_request(fed=None):
1✔
128
        """Returns the protocol for the current request.
129

130
        ...based on the request's hostname.
131

132
        Args:
133
          fed (str or protocol.Protocol): protocol to return if the current
134
            request is on ``fed.brid.gy``
135

136
        Returns:
137
          Protocol: protocol, or None if the provided domain or request hostname
138
          domain is not a subdomain of ``brid.gy`` or isn't a known protocol
139
        """
140
        return Protocol.for_bridgy_subdomain(request.host, fed=fed)
1✔
141

142
    @staticmethod
1✔
143
    def for_bridgy_subdomain(domain_or_url, fed=None):
1✔
144
        """Returns the protocol for a brid.gy subdomain.
145

146
        Args:
147
          domain_or_url (str)
148
          fed (str or protocol.Protocol): protocol to return if the current
149
            request is on ``fed.brid.gy``
150

151
        Returns:
152
          class: :class:`Protocol` subclass, or None if the provided domain or request
153
          hostname domain is not a subdomain of ``brid.gy`` or isn't a known
154
          protocol
155
        """
156
        domain = (util.domain_from_link(domain_or_url, minimize=False)
1✔
157
                  if util.is_web(domain_or_url)
158
                  else domain_or_url)
159

160
        if domain == common.PRIMARY_DOMAIN or domain in common.LOCAL_DOMAINS:
1✔
161
            return PROTOCOLS[fed] if isinstance(fed, str) else fed
1✔
162
        elif domain and domain.endswith(common.SUPERDOMAIN):
1✔
163
            label = domain.removesuffix(common.SUPERDOMAIN)
1✔
164
            return PROTOCOLS.get(label)
1✔
165

166
    @classmethod
1✔
167
    def owns_id(cls, id):
1✔
168
        """Returns whether this protocol owns the id, or None if it's unclear.
169

170
        To be implemented by subclasses.
171

172
        IDs are string identities that uniquely identify users, and are intended
173
        primarily to be machine readable and usable. Compare to handles, which
174
        are human-chosen, human-meaningful, and often but not always unique.
175

176
        Some protocols' ids are more or less deterministic based on the id
177
        format, eg AT Protocol owns ``at://`` URIs. Others, like http(s) URLs,
178
        could be owned by eg Web or ActivityPub.
179

180
        This should be a quick guess without expensive side effects, eg no
181
        external HTTP fetches to fetch the id itself or otherwise perform
182
        discovery.
183

184
        Returns False if the id's domain is in :const:`common.DOMAIN_BLOCKLIST`.
185

186
        Args:
187
          id (str)
188

189
        Returns:
190
          bool or None:
191
        """
192
        return False
1✔
193

194
    @classmethod
1✔
195
    def owns_handle(cls, handle, allow_internal=False):
1✔
196
        """Returns whether this protocol owns the handle, or None if it's unclear.
197

198
        To be implemented by subclasses.
199

200
        Handles are string identities that are human-chosen, human-meaningful,
201
        and often but not always unique. Compare to IDs, which uniquely identify
202
        users, and are intended primarily to be machine readable and usable.
203

204
        Some protocols' handles are more or less deterministic based on the id
205
        format, eg ActivityPub (technically WebFinger) handles are
206
        ``@user@instance.com``. Others, like domains, could be owned by eg Web,
207
        ActivityPub, AT Protocol, or others.
208

209
        This should be a quick guess without expensive side effects, eg no
210
        external HTTP fetches to fetch the id itself or otherwise perform
211
        discovery.
212

213
        Args:
214
          handle (str)
215
          allow_internal (bool): whether to return False for internal domains
216
            like ``fed.brid.gy``, ``bsky.brid.gy``, etc
217

218
        Returns:
219
          bool or None
220
        """
221
        return False
1✔
222

223
    @classmethod
1✔
224
    def handle_to_id(cls, handle):
1✔
225
        """Converts a handle to an id.
226

227
        To be implemented by subclasses.
228

229
        May incur network requests, eg DNS queries or HTTP requests.
230

231
        Args:
232
          handle (str)
233

234
        Returns:
235
          str: corresponding id, or None if the handle can't be found
236
        """
UNCOV
237
        raise NotImplementedError()
×
238

239
    @classmethod
1✔
240
    def key_for(cls, id):
1✔
241
        """Returns the :class:`google.cloud.ndb.Key` for a given id's :class:`models.User`.
242

243
        To be implemented by subclasses. Canonicalizes the id if necessary.
244

245
        If called via `Protocol.key_for`, infers the appropriate protocol with
246
        :meth:`for_id`. If called with a concrete subclass, uses that subclass
247
        as is.
248

249
        Returns:
250
          google.cloud.ndb.Key: matching key, or None if the given id is not a
251
          valid :class:`User` id for this protocol.
252
        """
253
        if cls == Protocol:
1✔
254
            proto = Protocol.for_id(id)
1✔
255
            return proto.key_for(id) if proto else None
1✔
256

257
        # load user so that we follow use_instead
258
        existing = cls.get_by_id(id, allow_opt_out=True)
1✔
259
        if existing:
1✔
260
            if existing.status:
1✔
261
                return None
1✔
262
            return existing.key
1✔
263

264
        return cls(id=id).key
1✔
265

266
    @cached(LRUCache(20000), lock=Lock())
1✔
267
    @staticmethod
1✔
268
    def for_id(id, remote=True):
1✔
269
        """Returns the protocol for a given id.
270

271
        Args:
272
          id (str)
273
          remote (bool): whether to perform expensive side effects like fetching
274
            the id itself over the network, or other discovery.
275

276
        Returns:
277
          Protocol subclass: matching protocol, or None if no single known
278
          protocol definitively owns this id
279
        """
280
        logger.info(f'Determining protocol for id {id}')
1✔
281
        if not id:
1✔
282
            return None
1✔
283

284
        if util.is_web(id):
1✔
285
            # step 1: check for our per-protocol subdomains
286
            is_homepage = urlparse(id).path.strip('/') == ''
1✔
287
            by_subdomain = Protocol.for_bridgy_subdomain(id)
1✔
288
            if by_subdomain and not is_homepage and id not in BOT_ACTOR_AP_IDS:
1✔
289
                logger.info(f'  {by_subdomain.LABEL} owns id {id}')
1✔
290
                return by_subdomain
1✔
291

292
        # step 2: check if any Protocols say conclusively that they own it
293
        # sort to be deterministic
294
        protocols = sorted(set(p for p in PROTOCOLS.values() if p),
1✔
295
                           key=lambda p: p.LABEL)
296
        candidates = []
1✔
297
        for protocol in protocols:
1✔
298
            owns = protocol.owns_id(id)
1✔
299
            if owns:
1✔
300
                logger.info(f'  {protocol.LABEL} owns id {id}')
1✔
301
                return protocol
1✔
302
            elif owns is not False:
1✔
303
                candidates.append(protocol)
1✔
304

305
        if len(candidates) == 1:
1✔
306
            logger.info(f'  {candidates[0].LABEL} owns id {id}')
1✔
307
            return candidates[0]
1✔
308

309
        # step 3: look for existing Objects in the datastore
310
        obj = Protocol.load(id, remote=False)
1✔
311
        if obj and obj.source_protocol:
1✔
312
            logger.info(f'  {obj.key} owned by source_protocol {obj.source_protocol}')
1✔
313
            return PROTOCOLS[obj.source_protocol]
1✔
314

315
        # step 4: fetch over the network, if necessary
316
        if not remote:
1✔
317
            return None
1✔
318

319
        for protocol in candidates:
1✔
320
            logger.info(f'Trying {protocol.LABEL}')
1✔
321
            try:
1✔
322
                if protocol.load(id, local=False, remote=True):
1✔
323
                    logger.info(f'  {protocol.LABEL} owns id {id}')
1✔
324
                    return protocol
1✔
325
            except werkzeug.exceptions.BadGateway:
1✔
326
                # we tried and failed fetching the id over the network.
327
                # this depends on ActivityPub.fetch raising this!
328
                return None
1✔
329
            except werkzeug.exceptions.HTTPException as e:
×
330
                # internal error we generated ourselves; try next protocol
UNCOV
331
                pass
×
332
            except Exception as e:
×
333
                code, _ = util.interpret_http_exception(e)
×
UNCOV
334
                if code:
×
335
                    # we tried and failed fetching the id over the network
UNCOV
336
                    return None
×
UNCOV
337
                raise
×
338

339
        logger.info(f'No matching protocol found for {id} !')
1✔
340
        return None
1✔
341

342
    @staticmethod
1✔
343
    def for_handle(handle):
1✔
344
        """Returns the protocol for a given handle.
345

346
        May incur expensive side effects like resolving the handle itself over
347
        the network or other discovery.
348

349
        Args:
350
          handle (str)
351

352
        Returns:
353
          (Protocol subclass, str) tuple: matching protocol and optional id (if
354
          resolved), or ``(None, None)`` if no known protocol owns this handle
355
        """
356
        # TODO: normalize, eg convert domains to lower case
357
        logger.info(f'Determining protocol for handle {handle}')
1✔
358
        if not handle:
1✔
359
            return (None, None)
1✔
360

361
        # step 1: check if any Protocols say conclusively that they own it.
362
        # sort to be deterministic.
363
        protocols = sorted(set(p for p in PROTOCOLS.values() if p),
1✔
364
                           key=lambda p: p.LABEL)
365
        candidates = []
1✔
366
        for proto in protocols:
1✔
367
            owns = proto.owns_handle(handle)
1✔
368
            if owns:
1✔
369
                logger.info(f'  {proto.LABEL} owns handle {handle}')
1✔
370
                return (proto, None)
1✔
371
            elif owns is not False:
1✔
372
                candidates.append(proto)
1✔
373

374
        if len(candidates) == 1:
1✔
UNCOV
375
            logger.info(f'  {candidates[0].LABEL} owns handle {handle}')
×
UNCOV
376
            return (candidates[0], None)
×
377

378
        # step 2: look for matching User in the datastore
379
        for proto in candidates:
1✔
380
            user = proto.query(proto.handle == handle).get()
1✔
381
            if user:
1✔
382
                if user.status:
1✔
383
                    return (None, None)
1✔
384
                logger.info(f'  user {user.key} owns handle {handle}')
1✔
385
                return (proto, user.key.id())
1✔
386

387
        # step 3: resolve handle to id
388
        for proto in candidates:
1✔
389
            id = proto.handle_to_id(handle)
1✔
390
            if id:
1✔
391
                logger.info(f'  {proto.LABEL} resolved handle {handle} to id {id}')
1✔
392
                return (proto, id)
1✔
393

394
        return (None, None)
1✔
395

396
    @classmethod
1✔
397
    def bridged_web_url_for(cls, user):
1✔
398
        """Returns the web URL for a user's bridged profile in this protocol.
399

400
        For example, for Web user ``alice.com``, :meth:`ATProto.bridged_web_url_for`
401
        returns ``https://bsky.app/profile/alice.com.web.brid.gy``
402

403
        Args:
404
          user (models.User)
405

406
        Returns:
407
          str, or None if there isn't a canonical URL
408
        """
409
        return None
1✔
410

411
    @classmethod
1✔
412
    def actor_key(cls, obj):
1✔
413
        """Returns the :class:`User`: key for a given object's author or actor.
414

415
        Args:
416
          obj (models.Object)
417

418
        Returns:
419
          google.cloud.ndb.key.Key or None:
420
        """
421
        owner = as1.get_owner(obj.as1)
1✔
422
        if owner:
1✔
423
            return cls.key_for(owner)
1✔
424

425
    @classmethod
1✔
426
    def bot_user_id(cls):
1✔
427
        """Returns the Web user id for the bot user for this protocol.
428

429
        For example, ``'bsky.brid.gy'`` for ATProto.
430

431
        Returns:
432
          str:
433
        """
434
        return f'{cls.ABBREV}{common.SUPERDOMAIN}'
1✔
435

436
    @classmethod
1✔
437
    def create_for(cls, user):
1✔
438
        """Creates a copy user in this protocol.
439

440
        Should add the copy user to :attr:`copies`.
441

442
        Args:
443
          user (models.User): original source user. Shouldn't already have a
444
            copy user for this protocol in :attr:`copies`.
445

446
        Raises:
447
          ValueError: if we can't create a copy of the given user in this protocol
448
        """
UNCOV
449
        raise NotImplementedError()
×
450

451
    @classmethod
1✔
452
    def send(to_cls, obj, url, from_user=None, orig_obj=None):
1✔
453
        """Sends an outgoing activity.
454

455
        To be implemented by subclasses.
456

457
        Args:
458
          obj (models.Object): with activity to send
459
          url (str): destination URL to send to
460
          from_user (models.User): user (actor) this activity is from
461
          orig_obj (models.Object): the "original object" that this object
462
            refers to, eg replies to or reposts or likes
463

464
        Returns:
465
          bool: True if the activity is sent successfully, False if it is
466
          ignored or otherwise unsent due to protocol logic, eg no webmention
467
          endpoint, protocol doesn't support the activity type. (Failures are
468
          raised as exceptions.)
469

470
        Raises:
471
          werkzeug.HTTPException if the request fails
472
        """
UNCOV
473
        raise NotImplementedError()
×
474

475
    @classmethod
1✔
476
    def fetch(cls, obj, **kwargs):
1✔
477
        """Fetches a protocol-specific object and populates it in an :class:`Object`.
478

479
        Errors are raised as exceptions. If this method returns False, the fetch
480
        didn't fail but didn't succeed either, eg the id isn't valid for this
481
        protocol, or the fetch didn't return valid data for this protocol.
482

483
        To be implemented by subclasses.
484

485
        Args:
486
          obj (models.Object): with the id to fetch. Data is filled into one of
487
            the protocol-specific properties, eg ``as2``, ``mf2``, ``bsky``.
488
          kwargs: subclass-specific
489

490
        Returns:
491
          bool: True if the object was fetched and populated successfully,
492
          False otherwise
493

494
        Raises:
495
          werkzeug.HTTPException: if the fetch fails
496
        """
UNCOV
497
        raise NotImplementedError()
×
498

499
    @classmethod
1✔
500
    def convert(cls, obj, from_user=None, **kwargs):
1✔
501
        """Converts an :class:`Object` to this protocol's data format.
502

503
        For example, an HTML string for :class:`Web`, or a dict with AS2 JSON
504
        and ``application/activity+json`` for :class:`ActivityPub`.
505

506
        Just passes through to :meth:`_convert`, then does minor
507
        protocol-independent postprocessing.
508

509
        Args:
510
          obj (models.Object):
511
          from_user (models.User): user (actor) this activity/object is from
512
          kwargs: protocol-specific, passed through to :meth:`_convert`
513

514
        Returns:
515
          converted object in the protocol's native format, often a dict
516
        """
517
        id = None
1✔
518
        if obj:
1✔
519
            id = obj.key.id() if obj.key else obj.as1.get('id') if obj.as1 else None
1✔
520

521
        # mark bridged actors as bots and add "bridged by Bridgy Fed" to their bios
522
        if (from_user and obj and obj.as1
1✔
523
            and obj.as1.get('objectType') in as1.ACTOR_TYPES
524
            and PROTOCOLS.get(obj.source_protocol) != cls
525
            and Protocol.for_bridgy_subdomain(id) not in DOMAINS
526
            and 'Bridgy Fed]' not in html_to_text(obj.as1.get('summary', ''))
527
            # Web users are special cased, they don't get the label if they've
528
            # explicitly enabled Bridgy Fed with redirects or webmentions
529
            and not (from_user.LABEL == 'web'
530
                     and (from_user.last_webmention_in or from_user.has_redirects))):
531
            obj.our_as1 = copy.deepcopy(obj.as1)
1✔
532
            obj.our_as1['objectType'] = 'application'
1✔
533

534
            if from_user.key and id == from_user.profile_id():
1✔
535
                disclaimer = f'[<a href="https://{PRIMARY_DOMAIN}{from_user.user_page_path()}">bridged</a> from <a href="{from_user.web_url()}">{from_user.handle_or_id()}</a> by <a href="https://{PRIMARY_DOMAIN}/">Bridgy Fed</a>]'
1✔
536
            else:
537
                url = as1.get_url(obj.our_as1) or id
1✔
538
                name = obj.our_as1.get('displayName') or obj.our_as1.get('username')
1✔
539
                source = (util.pretty_link(url, text=name) if url
1✔
540
                          else name if name
541
                          else '')
542
                if source:
1✔
543
                    source = ' from ' + source
1✔
544
                disclaimer = f'[bridged{source} by <a href="https://{PRIMARY_DOMAIN}/">Bridgy Fed</a>]'
1✔
545

546

547
            summary = obj.our_as1.setdefault('summary', '')
1✔
548
            if not summary.endswith(disclaimer):
1✔
549
                if summary:
1✔
550
                    obj.our_as1['summary'] += '<br><br>'
1✔
551
                obj.our_as1['summary'] += disclaimer
1✔
552

553
        return cls._convert(obj, from_user=from_user, **kwargs)
1✔
554

555
    @classmethod
1✔
556
    def _convert(cls, obj, from_user=None, **kwargs):
1✔
557
        """Converts an :class:`Object` to this protocol's data format.
558

559
        To be implemented by subclasses. Implementations should generally call
560
        :meth:`Protocol.translate_ids` (as their own class) before converting to
561
        their format.
562

563
        Args:
564
          obj (models.Object):
565
          from_user (models.User): user (actor) this activity/object is from
566
          kwargs: protocol-specific
567

568
        Returns:
569
          converted object in the protocol's native format, often a dict. May
570
            return the ``{}`` empty dict if the object can't be converted.
571
        """
UNCOV
572
        raise NotImplementedError()
×
573

574
    @classmethod
1✔
575
    def target_for(cls, obj, shared=False):
1✔
576
        """Returns an :class:`Object`'s delivery target (endpoint).
577

578
        To be implemented by subclasses.
579

580
        Examples:
581

582
        * If obj has ``source_protocol`` ``web``, returns its URL, as a
583
          webmention target.
584
        * If obj is an ``activitypub`` actor, returns its inbox.
585
        * If obj is an ``activitypub`` object, returns it's author's or actor's
586
          inbox.
587

588
        Args:
589
          obj (models.Object):
590
          shared (bool): optional. If True, returns a common/shared
591
            endpoint, eg ActivityPub's ``sharedInbox``, that can be reused for
592
            multiple recipients for efficiency
593

594
        Returns:
595
          str: target endpoint, or None if not available.
596
        """
UNCOV
597
        raise NotImplementedError()
×
598

599
    @classmethod
1✔
600
    def is_blocklisted(cls, url, allow_internal=False):
1✔
601
        """Returns True if we block the given URL and shouldn't deliver to it.
602

603
        Default implementation here, subclasses may override.
604

605
        Args:
606
          url (str):
607
          allow_internal (bool): whether to return False for internal domains
608
            like ``fed.brid.gy``, ``bsky.brid.gy``, etc
609

610
        Returns: bool:
611
        """
612
        blocklist = DOMAIN_BLOCKLIST
1✔
613
        if not allow_internal:
1✔
614
            blocklist += DOMAINS
1✔
615
        return util.domain_or_parent_in(util.domain_from_link(url), blocklist)
1✔
616

617
    @classmethod
1✔
618
    def translate_ids(to_cls, obj):
1✔
619
        """Translates all ids in an AS1 object to a specific protocol.
620

621
        Infers source protocol for each id value separately.
622

623
        For example, if ``proto`` is :class:`ActivityPub`, the ATProto URI
624
        ``at://did:plc:abc/coll/123`` will be converted to
625
        ``https://bsky.brid.gy/ap/at://did:plc:abc/coll/123``.
626

627
        Wraps these AS1 fields:
628

629
        * ``id``
630
        * ``actor``
631
        * ``author``
632
        * ``object``
633
        * ``object.actor``
634
        * ``object.author``
635
        * ``object.id``
636
        * ``object.inReplyTo``
637
        * ``tags.[objectType=mention].url``
638

639
        This is the inverse of :meth:`models.Object.resolve_ids`. Much of the
640
        same logic is duplicated there!
641

642
        TODO: unify with :meth:`Object.resolve_ids`,
643
        :meth:`models.Object.normalize_ids`.
644

645
        Args:
646
          to_proto (Protocol subclass)
647
          obj (dict): AS1 object or activity (not :class:`models.Object`!)
648

649
        Returns:
650
          dict: wrapped version of ``obj``
651
        """
652
        assert to_cls != Protocol
1✔
653
        if not obj:
1✔
UNCOV
654
            return obj
×
655

656
        outer_obj = copy.deepcopy(obj)
1✔
657
        inner_obj = outer_obj['object'] = as1.get_object(outer_obj)
1✔
658

659
        def translate(elem, field, fn):
1✔
660
            elem[field] = as1.get_object(elem, field)
1✔
661
            if id := elem[field].get('id'):
1✔
662
                from_cls = Protocol.for_id(id)
1✔
663
                # TODO: what if from_cls is None? relax translate_object_id,
664
                # make it a noop if we don't know enough about from/to?
665
                if from_cls and from_cls != to_cls:
1✔
666
                    elem[field]['id'] = fn(id=id, from_=from_cls, to=to_cls)
1✔
667
            if elem[field].keys() == {'id'}:
1✔
668
                elem[field] = elem[field]['id']
1✔
669

670
        type = as1.object_type(outer_obj)
1✔
671
        translate(outer_obj, 'id',
1✔
672
                  translate_user_id if type in as1.ACTOR_TYPES
673
                  else translate_object_id)
674

675
        inner_is_actor = (as1.object_type(inner_obj) in as1.ACTOR_TYPES
1✔
676
                          or type in ('follow', 'stop-following'))
677
        translate(inner_obj, 'id',
1✔
678
                  translate_user_id if inner_is_actor else translate_object_id)
679

680
        for o in outer_obj, inner_obj:
1✔
681
            translate(o, 'inReplyTo', translate_object_id)
1✔
682
            for field in 'actor', 'author':
1✔
683
                translate(o, field, translate_user_id)
1✔
684
            for tag in as1.get_objects(o, 'tags'):
1✔
685
                if tag.get('objectType') == 'mention':
1✔
686
                    translate(tag, 'url', translate_user_id)
1✔
687

688
        outer_obj = util.trim_nulls(outer_obj)
1✔
689
        if outer_obj.get('object', {}).keys() == {'id'}:
1✔
690
            outer_obj['object'] = inner_obj['id']
1✔
691

692
        return outer_obj
1✔
693

694
    @classmethod
1✔
695
    def receive(from_cls, obj, authed_as=None, internal=False):
1✔
696
        """Handles an incoming activity.
697

698
        If ``obj``'s key is unset, ``obj.as1``'s id field is used. If both are
699
        unset, raises :class:`werkzeug.exceptions.BadRequest`.
700

701
        Args:
702
          obj (models.Object)
703
          authed_as (str): authenticated actor id who sent this activity
704
          internal (bool): whether to allow activity ids on internal domains
705

706
        Returns:
707
          (str, int) tuple: (response body, HTTP status code) Flask response
708

709
        Raises:
710
          werkzeug.HTTPException: if the request is invalid
711
        """
712
        # check some invariants
713
        assert from_cls != Protocol
1✔
714
        assert isinstance(obj, Object), obj
1✔
715
        logger.info(f'From {from_cls.LABEL}: {obj.key} AS1: {json_dumps(obj.as1, indent=2)}')
1✔
716

717
        if not obj.as1:
1✔
UNCOV
718
            error('No object data provided')
×
719

720
        id = None
1✔
721
        if obj.key and obj.key.id():
1✔
722
            id = obj.key.id()
1✔
723

724
        if not id:
1✔
725
            id = obj.as1.get('id')
1✔
726
            obj.key = ndb.Key(Object, id)
1✔
727

728
        if not id:
1✔
UNCOV
729
            error('No id provided')
×
730
        elif from_cls.is_blocklisted(id, allow_internal=internal):
1✔
731
            error(f'Activity {id} is blocklisted')
1✔
732

733
        # short circuit if we've already seen this activity id.
734
        # (don't do this for bare objects since we need to check further down
735
        # whether they've been updated since we saw them last.)
736
        if obj.as1.get('objectType') == 'activity' and 'force' not in request.values:
1✔
737
            with seen_ids_lock:
1✔
738
                already_seen = id in seen_ids
1✔
739
                seen_ids[id] = True
1✔
740

741
            if (already_seen
1✔
742
                    or (obj.new is False and obj.changed is False)
743
                    or (obj.new is None and obj.changed is None
744
                        and from_cls.load(id, remote=False))):
745
                msg = f'Already handled this activity {id}'
1✔
746
                logger.info(msg)
1✔
747
                return msg, 204
1✔
748

749
        # load actor user, check authorization
750
        # https://www.w3.org/wiki/ActivityPub/Primer/Authentication_Authorization
751
        actor = as1.get_owner(obj.as1)
1✔
752
        if not actor:
1✔
753
            error('Activity missing actor or author', status=400)
1✔
754
        elif from_cls.owns_id(actor) is False:
1✔
755
            error(f"{from_cls.LABEL} doesn't own actor {actor}, this is probably a bridged activity. Skipping.", status=204)
1✔
756

757
        assert authed_as
1✔
758
        assert isinstance(authed_as, str)
1✔
759
        authed_as = normalize_user_id(id=authed_as, proto=from_cls)
1✔
760
        actor = normalize_user_id(id=actor, proto=from_cls)
1✔
761
        if actor != authed_as:
1✔
762
            msg = f"actor {actor} isn't authed user {authed_as}"
1✔
763
            report_error(msg)
1✔
764
            error(msg, status=403)
1✔
765

766
        # update copy ids to originals
767
        obj.normalize_ids()
1✔
768
        obj.resolve_ids()
1✔
769

770
        if (obj.type == 'follow'
1✔
771
                and Protocol.for_bridgy_subdomain(as1.get_object(obj.as1).get('id'))):
772
            # refresh profile for follows of bot users to re-check whether
773
            # they're opted out
774
            logger.info(f'Follow of bot user, reloading {actor}')
1✔
775
            from_cls.load(actor, remote=True)
1✔
776
            from_user = from_cls.get_or_create(id=actor, allow_opt_out=True)
1✔
777
        else:
778
            # load actor user
779
            from_user = from_cls.get_or_create(id=actor)
1✔
780

781
        if not from_user or from_user.manual_opt_out:
1✔
782
            error(f'Actor {actor} is opted out or blocked', status=204)
1✔
783

784
        # write Object to datastore
785
        orig = obj
1✔
786
        obj = Object.get_or_create(id, authed_as=actor, **orig.to_dict())
1✔
787
        if orig.new is not None:
1✔
788
            obj.new = orig.new
1✔
789
        if orig.changed is not None:
1✔
790
            obj.changed = orig.changed
1✔
791

792
        # if this is a post, ie not an activity, wrap it in a create or update
793
        obj = from_cls.handle_bare_object(obj, authed_as=authed_as)
1✔
794
        obj.add('users', from_user.key)
1✔
795

796
        if obj.type not in SUPPORTED_TYPES:
1✔
797
            error(f'Sorry, {obj.type} activities are not supported yet.', status=501)
1✔
798

799
        inner_obj_as1 = as1.get_object(obj.as1)
1✔
800
        if obj.as1.get('verb') in ('post', 'update', 'delete'):
1✔
801
            if inner_owner := as1.get_owner(inner_obj_as1):
1✔
802
                if inner_owner_key := from_cls.key_for(inner_owner):
1✔
803
                    obj.add('users', inner_owner_key)
1✔
804

805
        obj.source_protocol = from_cls.LABEL
1✔
806
        obj.put()
1✔
807

808
        # store inner object
809
        inner_obj_id = inner_obj_as1.get('id')
1✔
810
        if obj.type in ('post', 'update') and inner_obj_as1.keys() > set(['id']):
1✔
811
            Object.get_or_create(inner_obj_id, our_as1=inner_obj_as1,
1✔
812
                                 source_protocol=from_cls.LABEL, authed_as=actor)
813

814
        actor = as1.get_object(obj.as1, 'actor')
1✔
815
        actor_id = actor.get('id')
1✔
816

817
        # handle activity!
818

819
        # accept, eg in response to a follow. only send if the destination
820
        # supports accepts.
821
        if obj.type == 'accept':
1✔
822
            to_cls = Protocol.for_id(inner_obj_id)
1✔
823
            if not to_cls or not to_cls.HAS_FOLLOW_ACCEPTS:
1✔
824
                return 'OK'  # noop
1✔
825

826
        elif obj.type == 'stop-following':
1✔
827
            # TODO: unify with handle_follow?
828
            # TODO: handle multiple followees
829
            if not actor_id or not inner_obj_id:
1✔
UNCOV
830
                error(f'Undo of Follow requires actor id and object id. Got: {actor_id} {inner_obj_id} {obj.as1}')
×
831

832
            # deactivate Follower
833
            from_ = from_cls.key_for(actor_id)
1✔
834
            to_cls = Protocol.for_id(inner_obj_id)
1✔
835
            to = to_cls.key_for(inner_obj_id)
1✔
836
            follower = Follower.query(Follower.to == to,
1✔
837
                                      Follower.from_ == from_,
838
                                      Follower.status == 'active').get()
839
            if follower:
1✔
840
                logger.info(f'Marking {follower} inactive')
1✔
841
                follower.status = 'inactive'
1✔
842
                follower.put()
1✔
843
            else:
844
                logger.warning(f'No Follower found for {from_} => {to}')
1✔
845

846
            # fall through to deliver to followee
847
            # TODO: do we convert stop-following to webmention 410 of original
848
            # follow?
849

850
        elif obj.type in ('update', 'like', 'share'):  # require object
1✔
851
            if not inner_obj_id:
1✔
852
                error("Couldn't find id of object to update")
1✔
853

854
            # fall through to deliver to followers
855

856
        elif obj.type == 'delete':
1✔
857
            if not inner_obj_id:
1✔
UNCOV
858
                error("Couldn't find id of object to delete")
×
859

860
            logger.info(f'Marking Object {inner_obj_id} deleted')
1✔
861
            Object.get_or_create(inner_obj_id, deleted=True, authed_as=authed_as)
1✔
862

863
            # if this is an actor, deactivate its followers/followings
864
            # https://github.com/snarfed/bridgy-fed/issues/63
865
            deleted_user = from_cls.key_for(id=inner_obj_id)
1✔
866
            if deleted_user:
1✔
867
                logger.info(f'Deactivating Followers from or to = {inner_obj_id}')
1✔
868
                followers = Follower.query(OR(Follower.to == deleted_user,
1✔
869
                                              Follower.from_ == deleted_user)
870
                                           ).fetch()
871
                for f in followers:
1✔
872
                    f.status = 'inactive'
1✔
873
                ndb.put_multi(followers)
1✔
874

875
            # fall through to deliver to followers
876

877
        elif obj.type == 'block':
1✔
878
            proto = Protocol.for_bridgy_subdomain(inner_obj_id)
1✔
879
            if not proto:
1✔
UNCOV
880
                logger.info("Ignoring block, target isn't one of our protocol domains")
×
UNCOV
881
                return 'OK', 200
×
882

883
            from_user.disable_protocol(proto)
1✔
884
            proto.maybe_delete_copy(from_user)
1✔
885
            return 'OK', 200
1✔
886

887
        elif obj.type == 'post':
1✔
888
            to_cc = (as1.get_ids(inner_obj_as1, 'to')
1✔
889
                     + as1.get_ids(inner_obj_as1, 'cc'))
890
            if len(to_cc) == 1 and to_cc != [as2.PUBLIC_AUDIENCE]:
1✔
891
                proto = Protocol.for_bridgy_subdomain(to_cc[0])
1✔
892
                if proto:
1✔
893
                    # remove @-mentions of bot user in HTML links
894
                    soup = util.parse_html(inner_obj_as1.get('content', ''))
1✔
895
                    for link in soup.find_all('a'):
1✔
896
                        link.extract()
1✔
897
                    content = soup.get_text().strip().lower()
1✔
898
                    logger.info(f'got DM to {to_cc}: {content}')
1✔
899
                    if content in ('yes', 'ok'):
1✔
900
                        from_user.enable_protocol(proto)
1✔
901
                        proto.bot_follow(from_user)
1✔
902
                    elif content == 'no':
1✔
903
                        from_user.disable_protocol(proto)
1✔
904
                        proto.maybe_delete_copy(from_user)
1✔
905
                    return 'OK', 200
1✔
906

907
        # fetch actor if necessary
908
        if actor and actor.keys() == set(['id']):
1✔
909
            logger.info('Fetching actor so we have name, profile photo, etc')
1✔
910
            actor_obj = from_cls.load(actor['id'])
1✔
911
            if actor_obj and actor_obj.as1:
1✔
912
                obj.our_as1 = {**obj.as1, 'actor': actor_obj.as1}
1✔
913

914
        # fetch object if necessary so we can render it in feeds
915
        if (obj.type == 'share'
1✔
916
                and inner_obj_as1.keys() == set(['id'])
917
                and from_cls.owns_id(inner_obj_id)):
918
            logger.info('Fetching object so we can render it in feeds')
1✔
919
            inner_obj = from_cls.load(inner_obj_id)
1✔
920
            if inner_obj and inner_obj.as1:
1✔
921
                obj.our_as1 = {
1✔
922
                    **obj.as1,
923
                    'object': {
924
                        **inner_obj_as1,
925
                        **inner_obj.as1,
926
                    }
927
                }
928

929
        if obj.type == 'follow':
1✔
930
            proto = Protocol.for_bridgy_subdomain(inner_obj_id)
1✔
931
            if proto:
1✔
932
                # follow of one of our protocol bot users; enable that protocol.
933
                # foll through so that we send an accept.
934
                from_user.enable_protocol(proto)
1✔
935
                proto.bot_follow(from_user)
1✔
936

937
            from_cls.handle_follow(obj)
1✔
938

939
        # deliver to targets
940
        return from_cls.deliver(obj, from_user=from_user)
1✔
941

942
    @classmethod
1✔
943
    def handle_follow(from_cls, obj):
1✔
944
        """Handles an incoming follow activity.
945

946
        Sends an ``Accept`` back, but doesn't send the ``Follow`` itself. That
947
        happens in :meth:`deliver`.
948

949
        Args:
950
          obj (models.Object): follow activity
951
        """
952
        logger.info('Got follow. Loading users, storing Follow(s), sending accept(s)')
1✔
953

954
        # Prepare follower (from) users' data
955
        from_as1 = as1.get_object(obj.as1, 'actor')
1✔
956
        from_id = from_as1.get('id')
1✔
957
        if not from_id:
1✔
958
            error(f'Follow activity requires actor. Got: {obj.as1}')
×
959

960
        from_obj = from_cls.load(from_id)
1✔
961
        if not from_obj:
1✔
UNCOV
962
            error(f"Couldn't load {from_id}")
×
963

964
        if not from_obj.as1:
1✔
965
            from_obj.our_as1 = from_as1
1✔
966
            from_obj.put()
1✔
967

968
        from_key = from_cls.key_for(from_id)
1✔
969
        if not from_key:
1✔
UNCOV
970
            error(f'Invalid {from_cls} user key: {from_id}')
×
971
        obj.users = [from_key]
1✔
972
        from_user = from_cls.get_or_create(id=from_key.id(), obj=from_obj)
1✔
973

974
        # Prepare followee (to) users' data
975
        to_as1s = as1.get_objects(obj.as1)
1✔
976
        if not to_as1s:
1✔
977
            error(f'Follow activity requires object(s). Got: {obj.as1}')
1✔
978

979
        # Store Followers
980
        for to_as1 in to_as1s:
1✔
981
            to_id = to_as1.get('id')
1✔
982
            if not to_id:
1✔
UNCOV
983
                error(f'Follow activity requires object(s). Got: {obj.as1}')
×
984

985
            logger.info(f'Follow {from_id} => {to_id}')
1✔
986

987
            to_cls = Protocol.for_id(to_id)
1✔
988
            if not to_cls:
1✔
989
                error(f"Couldn't determine protocol for {to_id}")
1✔
990
            elif from_cls == to_cls and from_cls.LABEL != 'fake':
1✔
991
                logger.info(f'Skipping same-protocol Follower {from_id} => {to_id}')
1✔
992
                continue
1✔
993

994
            to_obj = to_cls.load(to_id)
1✔
995
            if to_obj and not to_obj.as1:
1✔
996
                to_obj.our_as1 = to_as1
1✔
997
                to_obj.put()
1✔
998

999
            to_key = to_cls.key_for(to_id)
1✔
1000
            if not to_key:
1✔
UNCOV
1001
                logger.info(f'Skipping invalid {from_cls} user key: {from_id}')
×
UNCOV
1002
                continue
×
1003

1004
            # If followee user is already direct, follower may not know they're
1005
            # interacting with a bridge. if followee user is indirect though,
1006
            # follower should know, so they're direct.
1007
            to_user = to_cls.get_or_create(id=to_key.id(), obj=to_obj, direct=False)
1✔
1008
            follower_obj = Follower.get_or_create(to=to_user, from_=from_user,
1✔
1009
                                                  follow=obj.key, status='active')
1010
            obj.add('notify', to_key)
1✔
1011
            from_cls.maybe_accept_follow(follower=from_user, followee=to_user,
1✔
1012
                                         follow=obj)
1013

1014
    @classmethod
1✔
1015
    def maybe_accept_follow(_, follower, followee, follow):
1✔
1016
        """Sends an accept activity for a follow.
1017

1018
        ...if the follower protocol handles accepts. Otherwise, does nothing.
1019

1020
        Args:
1021
          follower: :class:`models.User`
1022
          followee: :class:`models.User`
1023
          follow: :class:`models.Object`
1024
        """
1025
        if followee.HAS_FOLLOW_ACCEPTS:
1✔
1026
            return
1✔
1027

1028
        # send accept. note that this is one accept for the whole
1029
        # follow, even if it has multiple followees!
1030
        id = f'{followee.key.id()}/followers#accept-{follow.key.id()}'
1✔
1031
        accept = Object.get_or_create(id, our_as1={
1✔
1032
            'id': id,
1033
            'objectType': 'activity',
1034
            'verb': 'accept',
1035
            'actor': followee.key.id(),
1036
            'object': follow.as1,
1037
        })
1038

1039
        from_target = follower.target_for(follower.obj)
1✔
1040
        if not from_target:
1✔
UNCOV
1041
            error(f"Couldn't find delivery target for follower {follower}")
×
1042

1043
        sent = follower.send(accept, from_target, from_user=followee)
1✔
1044
        if sent:
1✔
1045
            accept.populate(
1✔
1046
                delivered=[Target(protocol=follower.LABEL, uri=from_target)],
1047
                status='complete',
1048
            )
1049
            accept.put()
1✔
1050

1051
    @classmethod
1✔
1052
    def bot_follow(bot_cls, user):
1✔
1053
        """Follow a user from a protocol bot user.
1054

1055
        ...so that the protocol starts sending us their activities, if it needs
1056
        a follow for that (eg ActivityPub).
1057

1058
        Args:
1059
          user (User)
1060
        """
1061
        from web import Web
1✔
1062
        bot = Web.get_by_id(bot_cls.bot_user_id())
1✔
1063
        now = util.now().isoformat()
1✔
1064
        logger.info(f'Following {user.key.id()} back from bot user {bot.key.id()}')
1✔
1065

1066
        target = user.target_for(user.obj)
1✔
1067
        follow_back_id = f'https://{bot.key.id()}/#follow-back-{user.key.id()}-{now}'
1✔
1068
        follow_back = Object(id=follow_back_id, source_protocol='web',
1✔
1069
                             undelivered=[Target(protocol=user.LABEL, uri=target)],
1070
                             our_as1={
1071
            'objectType': 'activity',
1072
            'verb': 'follow',
1073
            'id': follow_back_id,
1074
            'actor': bot.key.id(),
1075
            'object': user.key.id(),
1076
        }).put()
1077

1078
        common.create_task(queue='send', obj=follow_back.urlsafe(),
1✔
1079
                           url=target, protocol=user.LABEL,
1080
                           user=bot.key.urlsafe())
1081

1082
    @classmethod
1✔
1083
    def maybe_delete_copy(copy_cls, user):
1✔
1084
        """Deletes a user's copy actor in a given protocol.
1085

1086
        ...if ``copy_cls`` 's :attr:`Protocol.HAS_COPIES` is True. Otherwise,
1087
        does nothing.
1088

1089
        TODO: this should eventually go through receive for protocols that need
1090
        to deliver to all followers' targets, eg AP.
1091

1092
        Args:
1093
          user (User)
1094
        """
1095
        if not copy_cls.HAS_COPIES:
1✔
UNCOV
1096
            return
×
1097

1098
        copy_user_id = user.get_copy(copy_cls)
1✔
1099
        if not copy_user_id:
1✔
1100
            logger.warning(f"Tried to delete {user.key} copy for {copy_cls.LABEL}, which doesn't exist!")
1✔
1101
            return
1✔
1102

1103
        now = util.now().isoformat()
1✔
1104
        delete_id = f'{user.key.id()}#delete-copy-{copy_cls.LABEL}-{now}'
1✔
1105
        delete = Object(id=delete_id, source_protocol=user.LABEL, our_as1={
1✔
1106
            'id': delete_id,
1107
            'objectType': 'activity',
1108
            'verb': 'delete',
1109
            'actor': user.key.id(),
1110
            'object': copy_user_id,
1111
        })
1112

1113
        target_uri = copy_cls.target_for(Object(id=copy_user_id))
1✔
1114
        delete.undelivered = [Target(protocol=copy_cls.LABEL, uri=target_uri)]
1✔
1115
        delete.put()
1✔
1116

1117
        common.create_task(queue='send', obj=delete.key.urlsafe(),
1✔
1118
                           url=target_uri, protocol=copy_cls.LABEL,
1119
                           user=user.key.urlsafe())
1120

1121
    @classmethod
1✔
1122
    def handle_bare_object(cls, obj, authed_as=None):
1✔
1123
        """If obj is a bare object, wraps it in a create or update activity.
1124

1125
        Checks if we've seen it before.
1126

1127
        Args:
1128
          obj (models.Object)
1129
          authed_as (str): authenticated actor id who sent this activity
1130

1131
        Returns:
1132
          models.Object: ``obj`` if it's an activity, otherwise a new object
1133
        """
1134
        is_actor = obj.type in as1.ACTOR_TYPES
1✔
1135
        if not is_actor and obj.type not in ('note', 'article', 'comment'):
1✔
1136
            return obj
1✔
1137

1138
        obj_actor = as1.get_owner(obj.as1)
1✔
1139
        now = util.now().isoformat()
1✔
1140

1141
        # this is a raw post; wrap it in a create or update activity
1142
        if obj.changed or is_actor:
1✔
1143
            if obj.changed:
1✔
1144
                logger.info(f'Content has changed from last time at {obj.updated}! Redelivering to all inboxes')
1✔
1145
            id = f'{obj.key.id()}#bridgy-fed-update-{now}'
1✔
1146
            update_as1 = {
1✔
1147
                'objectType': 'activity',
1148
                'verb': 'update',
1149
                'id': id,
1150
                'actor': obj_actor,
1151
                'object': {
1152
                    # Mastodon requires the updated field for Updates, so
1153
                    # add a default value.
1154
                    # https://docs.joinmastodon.org/spec/activitypub/#supported-activities-for-statuses
1155
                    # https://socialhub.activitypub.rocks/t/what-could-be-the-reason-that-my-update-activity-does-not-work/2893/4
1156
                    # https://github.com/mastodon/documentation/pull/1150
1157
                    'updated': now,
1158
                    **obj.as1,
1159
                },
1160
            }
1161
            logger.info(f'Wrapping in update: {json_dumps(update_as1, indent=2)}')
1✔
1162
            return Object(id=id, our_as1=update_as1,
1✔
1163
                          source_protocol=obj.source_protocol)
1164

1165
        create_id = f'{obj.key.id()}#bridgy-fed-create'
1✔
1166
        create = cls.load(create_id, remote=False)
1✔
1167
        if (obj.new or not create or create.status != 'complete'
1✔
1168
                # HACK: force query param here is specific to webmention
1169
                or 'force' in request.form):
1170
            if create:
1✔
1171
                logger.info(f'Existing create {create.key} status {create.status}')
1✔
1172
            else:
1173
                logger.info(f'No existing create activity')
1✔
1174
            create_as1 = {
1✔
1175
                'objectType': 'activity',
1176
                'verb': 'post',
1177
                'id': create_id,
1178
                'actor': obj_actor,
1179
                'object': obj.as1,
1180
                'published': now,
1181
            }
1182
            logger.info(f'Wrapping in post: {json_dumps(create_as1, indent=2)}')
1✔
1183
            return Object.get_or_create(create_id, our_as1=create_as1,
1✔
1184
                                        source_protocol=obj.source_protocol,
1185
                                        authed_as=authed_as)
1186

1187
        error(f'{obj.key.id()} is unchanged, nothing to do', status=204)
1✔
1188

1189
    @classmethod
1✔
1190
    def deliver(from_cls, obj, from_user):
1✔
1191
        """Delivers an activity to its external recipients.
1192

1193
        Args:
1194
          obj (models.Object): activity to deliver
1195
          from_user (models.User): user (actor) this activity is from
1196
        """
1197
        # find delivery targets. maps Target to Object or None
1198
        targets = from_cls.targets(obj, from_user=from_user)
1✔
1199

1200
        if not targets:
1✔
1201
            obj.status = 'ignored'
1✔
1202
            obj.put()
1✔
1203
            error(r'No targets, nothing to do ¯\_(ツ)_/¯', status=204)
1✔
1204

1205
        # sort targets so order is deterministic for tests, debugging, etc
1206
        sorted_targets = sorted(targets.items(), key=lambda t: t[0].uri)
1✔
1207
        obj.populate(
1✔
1208
            status='in progress',
1209
            delivered=[],
1210
            failed=[],
1211
            undelivered=[t for t, _ in sorted_targets],
1212
        )
1213
        obj.put()
1✔
1214
        logger.info(f'Delivering to: {obj.undelivered}')
1✔
1215

1216
        # enqueue send task for each targets
1217
        user = from_user.key.urlsafe()
1✔
1218
        for i, (target, orig_obj) in enumerate(sorted_targets):
1✔
1219
            orig_obj = orig_obj.key.urlsafe() if orig_obj else ''
1✔
1220
            common.create_task(queue='send', obj=obj.key.urlsafe(),
1✔
1221
                               url=target.uri, protocol=target.protocol,
1222
                               orig_obj=orig_obj, user=user)
1223

1224
        return 'OK', 202
1✔
1225

1226
    @classmethod
1✔
1227
    def targets(cls, obj, from_user):
1✔
1228
        """Collects the targets to send a :class:`models.Object` to.
1229

1230
        Targets are both objects - original posts, events, etc - and actors.
1231

1232
        Args:
1233
          obj (models.Object)
1234
          from_user (User)
1235

1236
        Returns:
1237
          dict: maps :class:`models.Target` to original (in response to)
1238
          :class:`models.Object`, if any, otherwise None
1239
        """
1240
        logger.info('Finding recipients and their targets')
1✔
1241

1242
        target_uris = sorted(set(as1.targets(obj.as1)))
1✔
1243
        logger.info(f'Raw targets: {target_uris}')
1✔
1244
        orig_obj = None
1✔
1245
        targets = {}  # maps Target to Object or None
1✔
1246
        owner = as1.get_owner(obj.as1)
1✔
1247

1248
        in_reply_to_protocols = set()  # protocol kinds, eg 'MagicKey'
1✔
1249
        in_reply_to_owners = []
1✔
1250
        in_reply_tos = as1.get_ids(as1.get_object(obj.as1), 'inReplyTo')
1✔
1251
        for in_reply_to in in_reply_tos:
1✔
1252
            if proto := Protocol.for_id(in_reply_to):
1✔
1253
                if in_reply_to_obj := proto.load(in_reply_to):
1✔
1254
                    if proto.LABEL != obj.source_protocol:
1✔
1255
                        in_reply_to_protocols.add(proto._get_kind())
1✔
1256
                    else:
1257
                        proto_labels = proto.DEFAULT_ENABLED_PROTOCOLS + tuple(
1✔
1258
                            c.protocol for c in in_reply_to_obj.copies)
1259
                        in_reply_to_protocols.update(PROTOCOLS[c.protocol]._get_kind()
1✔
1260
                                                     for c in in_reply_to_obj.copies)
1261

1262
                    if reply_owner := as1.get_owner(in_reply_to_obj.as1):
1✔
1263
                        in_reply_to_owners.append(reply_owner)
1✔
1264

1265
        is_reply = obj.type == 'comment' or in_reply_tos
1✔
1266
        is_self_reply = False
1✔
1267
        if is_reply:
1✔
1268
            logger.info(f"It's a reply...")
1✔
1269
            if in_reply_to_protocols:
1✔
1270
                logger.info(f'  ...delivering to these protocols where the in-reply-to post was native or bridged: {in_reply_to_protocols}')
1✔
1271
            else:
1272
                logger.info(f"  ...skipping, in-reply-to post(s) are same protocol and weren't bridged anywhere")
1✔
1273
                return {}
1✔
1274

1275
        for id in sorted(target_uris):
1✔
1276
            protocol = Protocol.for_id(id)
1✔
1277
            if not protocol:
1✔
1278
                logger.info(f"Can't determine protocol for {id}")
1✔
1279
                continue
1✔
1280
            elif protocol.is_blocklisted(id):
1✔
1281
                logger.info(f'{id} is blocklisted')
1✔
1282
                continue
1✔
1283

1284
            orig_obj = protocol.load(id)
1✔
1285
            if not orig_obj or not orig_obj.as1:
1✔
1286
                logger.info(f"Couldn't load {id}")
1✔
1287
                continue
1✔
1288

1289
            # deliver self-replies to followers
1290
            # https://github.com/snarfed/bridgy-fed/issues/639
1291
            if owner == as1.get_owner(orig_obj.as1):
1✔
1292
                is_self_reply = True
1✔
1293
                logger.info(f'Looks like a self reply! Delivering to followers')
1✔
1294

1295
            if protocol == cls and cls.LABEL != 'fake':
1✔
1296
                logger.info(f'Skipping same-protocol target {id}')
1✔
1297
                continue
1✔
1298
            elif id in in_reply_to_owners:
1✔
1299
                logger.info(f'Skipping mention of in-reply-to author')
1✔
1300
                continue
1✔
1301

1302
            target = protocol.target_for(orig_obj)
1✔
1303
            if not target:
1✔
1304
                # TODO: surface errors like this somehow?
UNCOV
1305
                logger.error(f"Can't find delivery target for {id}")
×
UNCOV
1306
                continue
×
1307

1308
            logger.info(f'Target for {id} is {target}')
1✔
1309
            targets[Target(protocol=protocol.LABEL, uri=target)] = orig_obj
1✔
1310
            orig_user = protocol.actor_key(orig_obj)
1✔
1311
            if orig_user:
1✔
1312
                logger.info(f'Recipient is {orig_user}')
1✔
1313
                obj.add('notify', orig_user)
1✔
1314

1315
        logger.info(f'Direct targets: {targets.keys()}')
1✔
1316

1317
        # deliver to followers, if appropriate
1318
        user_key = cls.actor_key(obj)
1✔
1319
        if not user_key:
1✔
1320
            logger.info("Can't tell who this is from! Skipping followers.")
1✔
1321
            return targets
1✔
1322

1323
        if (obj.type in ('post', 'update', 'delete', 'share')
1✔
1324
                and (not is_reply or (is_self_reply and in_reply_to_protocols))):
1325
            logger.info(f'Delivering to followers of {user_key}')
1✔
1326
            followers = [f for f in Follower.query(Follower.to == user_key,
1✔
1327
                                                   Follower.status == 'active')
1328
                         # skip protocol bot users
1329
                         if not Protocol.for_bridgy_subdomain(f.from_.id())]
1330
            user_keys = [f.from_ for f in followers]
1✔
1331
            if is_reply:
1✔
1332
                user_keys = [k for k in user_keys if k.kind() in in_reply_to_protocols]
1✔
1333
            users = [u for u in ndb.get_multi(user_keys) if u]
1✔
1334
            User.load_multi(users)
1✔
1335

1336
            # which object should we add to followers' feeds, if any
1337
            feed_obj = None
1✔
1338
            if obj.type == 'share':
1✔
1339
                feed_obj = obj
1✔
1340
            else:
1341
                inner = as1.get_object(obj.as1)
1✔
1342
                # don't add profile updates to feeds
1343
                if not (obj.type == 'update'
1✔
1344
                        and inner.get('objectType') in as1.ACTOR_TYPES):
1345
                    inner_id = inner.get('id')
1✔
1346
                    if inner_id:
1✔
1347
                        feed_obj = cls.load(inner_id)
1✔
1348

1349
            # include ATProto if this user is enabled there.
1350
            # TODO: abstract across protocols. maybe with this, below
1351
            # targets.update({
1352
            #     Target(protocol=proto.LABEL,
1353
            #            uri=proto.target_for(proto.bot_user_id())): None
1354
            #     for proto in PROTOCOLS
1355
            #     if proto and proto.HAS_COPIES
1356
            # })
1357

1358
            if 'atproto' in from_user.enabled_protocols:
1✔
1359
                if (not followers and
1✔
1360
                    (util.domain_or_parent_in(
1361
                        util.domain_from_link(from_user.key.id()), LIMITED_DOMAINS)
1362
                     or util.domain_or_parent_in(
1363
                         util.domain_from_link(obj.key.id()), LIMITED_DOMAINS))):
1364
                    logger.info(f'skipping ATProto, {from_user.key.id()} is on a limited domain and has no followers')
1✔
1365

1366
                elif (is_reply or obj.type == 'share') and not targets:
1✔
1367
                    logger.info(f"skipping ATProto, repost of or reply to post that wasn't bridged")
1✔
1368

1369
                else:
1370
                    from atproto import ATProto
1✔
1371
                    targets.setdefault(
1✔
1372
                        Target(protocol=ATProto.LABEL, uri=ATProto.PDS_URL), None)
1373
                    logger.info(f'user has ATProto enabled, adding {ATProto.PDS_URL}')
1✔
1374

1375
            for user in users:
1✔
1376
                if feed_obj:
1✔
1377
                    feed_obj.add('feed', user.key)
1✔
1378

1379
                # TODO: should we pass remote=False through here to Protocol.load?
1380
                target = user.target_for(user.obj, shared=True) if user.obj else None
1✔
1381
                if not target:
1✔
1382
                    # TODO: surface errors like this somehow?
1383
                    logger.error(f'Follower {user.key} has no delivery target')
1✔
1384
                    continue
1✔
1385

1386
                # normalize URL (lower case hostname, etc)
1387
                # ...but preserve our PDS URL without trailing slash in path
1388
                # https://atproto.com/specs/did#did-documents
1389
                target = util.dedupe_urls([target], trailing_slash=False)[0]
1✔
1390

1391
                # HACK: use last target object from above for reposts, which
1392
                # has its resolved id
1393
                targets[Target(protocol=user.LABEL, uri=target)] = \
1✔
1394
                    orig_obj if obj.type == 'share' else None
1395

1396
            if feed_obj:
1✔
1397
                feed_obj.put()
1✔
1398

1399
        # de-dupe targets, discard same-domain
1400
        # maps string target URL to (Target, Object) tuple
1401
        candidates = {t.uri: (t, obj) for t, obj in targets.items()}
1✔
1402
        # maps Target to Object or None
1403
        targets = {}
1✔
1404
        source_domains = [
1✔
1405
            util.domain_from_link(url) for url in
1406
            (obj.as1.get('id'), obj.as1.get('url'), as1.get_owner(obj.as1))
1407
            if util.is_web(url)
1408
        ]
1409
        for url in sorted(util.dedupe_urls(
1✔
1410
                candidates.keys(),
1411
                # preserve our PDS URL without trailing slash in path
1412
                # https://atproto.com/specs/did#did-documents
1413
                trailing_slash=False)):
1414
            if util.is_web(url) and util.domain_from_link(url) in source_domains:
1✔
UNCOV
1415
                logger.info(f'Skipping same-domain target {url}')
×
1416
            else:
1417
                target, obj = candidates[url]
1✔
1418
                targets[target] = obj
1✔
1419

1420
        return targets
1✔
1421

1422
    @classmethod
1✔
1423
    def load(cls, id, remote=None, local=True, **kwargs):
1✔
1424
        """Loads and returns an Object from memory cache, datastore, or HTTP fetch.
1425

1426
        Sets the :attr:`new` and :attr:`changed` attributes if we know either
1427
        one for the loaded object, ie local is True and remote is True or None.
1428

1429
        Note that :meth:`Object._post_put_hook` updates the cache.
1430

1431
        Args:
1432
          id (str)
1433
          remote (bool): whether to fetch the object over the network. If True,
1434
            fetches even if we already have the object stored, and updates our
1435
            stored copy. If False and we don't have the object stored, returns
1436
            None. Default (None) means to fetch over the network only if we
1437
            don't already have it stored.
1438
          local (bool): whether to load from the datastore before
1439
            fetching over the network. If False, still stores back to the
1440
            datastore after a successful remote fetch.
1441
          kwargs: passed through to :meth:`fetch()`
1442

1443
        Returns:
1444
          models.Object: loaded object, or None if it isn't fetchable, eg a
1445
          non-URL string for Web, or ``remote`` is False and it isn't in the
1446
          cache or datastore
1447

1448
        Raises:
1449
          requests.HTTPError: anything that :meth:`fetch` raises
1450
        """
1451
        assert id
1✔
1452
        assert local or remote is not False
1✔
1453
        # logger.debug(f'Loading Object {id} local={local} remote={remote}')
1454

1455
        obj = orig_as1 = None
1✔
1456
        if local and not obj:
1✔
1457
            obj = Object.get_by_id(id)
1✔
1458
            if not obj:
1✔
1459
                # logger.debug(f' not in datastore')
1460
                pass
1✔
1461
            elif obj.as1 or obj.raw or obj.deleted:
1✔
1462
                # logger.debug('  got from datastore')
1463
                obj.new = False
1✔
1464

1465
        if remote is False:
1✔
1466
            return obj
1✔
1467
        elif remote is None and obj:
1✔
1468
            if obj.updated < util.as_utc(util.now() - OBJECT_REFRESH_AGE):
1✔
1469
                # logger.debug(f'  last updated {obj.updated}, refreshing')
1470
                pass
1✔
1471
            else:
1472
                return obj
1✔
1473

1474
        if obj:
1✔
1475
            orig_as1 = obj.as1
1✔
1476
            obj.clear()
1✔
1477
            obj.new = False
1✔
1478
        else:
1479
            obj = Object(id=id)
1✔
1480
            if local:
1✔
1481
                # logger.debug('  not in datastore')
1482
                obj.new = True
1✔
1483
                obj.changed = False
1✔
1484

1485
        fetched = cls.fetch(obj, **kwargs)
1✔
1486
        if not fetched:
1✔
1487
            return None
1✔
1488

1489
        # https://stackoverflow.com/a/3042250/186123
1490
        size = len(_entity_to_protobuf(obj)._pb.SerializeToString())
1✔
1491
        if size > models.MAX_ENTITY_SIZE:
1✔
1492
            logger.warning(f'Object is too big! {size} bytes is over {models.MAX_ENTITY_SIZE}')
1✔
1493
            return None
1✔
1494

1495
        obj.resolve_ids()
1✔
1496
        obj.normalize_ids()
1✔
1497

1498
        if obj.new is False:
1✔
1499
            obj.changed = obj.activity_changed(orig_as1)
1✔
1500

1501
        if obj.source_protocol not in (cls.LABEL, cls.ABBREV):
1✔
1502
            if obj.source_protocol:
1✔
UNCOV
1503
                logger.warning(f'Object {obj.key.id()} changed protocol from {obj.source_protocol} to {cls.LABEL} ?!')
×
1504
            obj.source_protocol = cls.LABEL
1✔
1505

1506
        obj.put()
1✔
1507
        return obj
1✔
1508

1509

1510
@cloud_tasks_only
1✔
1511
def receive_task():
1✔
1512
    """Task handler for a newly received :class:`models.Object`.
1513

1514
    Calls :meth:`Protocol.receive` with the form parameters.
1515

1516
    Parameters:
1517
      obj (url-safe google.cloud.ndb.key.Key): :class:`models.Object` to handle
1518
      authed_as (str): passed to :meth:`Protocol.receive`
1519

1520
    TODO: migrate incoming webmentions and AP inbox deliveries to this. The
1521
    difficulty is that parts of :meth:`protocol.Protocol.receive` depend on
1522
    setup in :func:`web.webmention` and :func:`activitypub.inbox`, eg
1523
    :class:`models.Object` with ``new`` and ``changed``, HTTP request details,
1524
    etc. See stash for attempt at this for :class:`web.Web`.
1525
    """
1526
    form = request.form.to_dict()
1✔
1527
    logger.info(f'Params: {list(form.items())}')
1✔
1528

1529
    obj = ndb.Key(urlsafe=form['obj']).get()
1✔
1530
    assert obj
1✔
1531
    obj.new = True
1✔
1532

1533
    authed_as = form.get('authed_as')
1✔
1534

1535
    internal = (authed_as == common.PRIMARY_DOMAIN
1✔
1536
                or authed_as in common.PROTOCOL_DOMAINS)
1537
    try:
1✔
1538
        return PROTOCOLS[obj.source_protocol].receive(obj=obj, authed_as=authed_as,
1✔
1539
                                                      internal=internal)
1540
    except ValueError as e:
1✔
1541
        logger.warning(e, exc_info=True)
1✔
1542
        error(e, status=304)
1✔
1543

1544

1545
@cloud_tasks_only
1✔
1546
def send_task():
1✔
1547
    """Task handler for sending an activity to a single specific destination.
1548

1549
    Calls :meth:`Protocol.send` with the form parameters.
1550

1551
    Parameters:
1552
      protocol (str): :class:`Protocol` to send to
1553
      url (str): destination URL to send to
1554
      obj (url-safe google.cloud.ndb.key.Key): :class:`models.Object` to send
1555
      orig_obj (url-safe google.cloud.ndb.key.Key): optional "original object"
1556
        :class:`models.Object` that this object refers to, eg replies to or
1557
        reposts or likes
1558
      user (url-safe google.cloud.ndb.key.Key): :class:`models.User` (actor)
1559
        this activity is from
1560
    """
1561
    form = request.form.to_dict()
1✔
1562
    logger.info(f'Params: {list(form.items())}')
1✔
1563

1564
    # prepare
1565
    url = form.get('url')
1✔
1566
    protocol = form.get('protocol')
1✔
1567
    if not url or not protocol:
1✔
1568
        logger.warning(f'Missing protocol or url; got {protocol} {url}')
1✔
1569
        return '', 204
1✔
1570

1571
    target = Target(uri=url, protocol=protocol)
1✔
1572

1573
    obj = ndb.Key(urlsafe=form['obj']).get()
1✔
1574
    if (target not in obj.undelivered and target not in obj.failed
1✔
1575
            and 'force' not in request.values):
UNCOV
1576
        logger.info(f"{url} not in {obj.key.id()} undelivered or failed, giving up")
×
UNCOV
1577
        return r'¯\_(ツ)_/¯', 204
×
1578

1579
    user = None
1✔
1580
    if user_key := form.get('user'):
1✔
1581
        user = ndb.Key(urlsafe=user_key).get()
1✔
1582
    orig_obj = (ndb.Key(urlsafe=form['orig_obj']).get()
1✔
1583
                if form.get('orig_obj') else None)
1584

1585
    # send
1586
    logger.info(f'Sending {obj.key.id()} AS1: {json_dumps(obj.as1, indent=2)}')
1✔
1587
    sent = None
1✔
1588
    try:
1✔
1589
        sent = PROTOCOLS[protocol].send(obj, url, from_user=user, orig_obj=orig_obj)
1✔
1590
    except BaseException as e:
1✔
1591
        code, body = util.interpret_http_exception(e)
1✔
1592
        if not code and not body:
1✔
UNCOV
1593
            logger.info(str(e), exc_info=True)
×
1594

1595
    if sent is False:
1✔
1596
        logger.info(f'Failed sending {obj.key.id()} to {url}')
1✔
1597

1598
    # write results to Object
1599
    @ndb.transactional()
1✔
1600
    def update_object(obj_key):
1✔
1601
        obj = obj_key.get()
1✔
1602
        if target in obj.undelivered:
1✔
1603
            obj.remove('undelivered', target)
1✔
1604

1605
        if sent is None:
1✔
1606
            obj.add('failed', target)
1✔
1607
        else:
1608
            if target in obj.failed:
1✔
UNCOV
1609
                obj.remove('failed', target)
×
1610
            if sent:
1✔
1611
                obj.add('delivered', target)
1✔
1612

1613
        if not obj.undelivered:
1✔
1614
            obj.status = ('complete' if obj.delivered
1✔
1615
                          else 'failed' if obj.failed
1616
                          else 'ignored')
1617
        obj.put()
1✔
1618

1619
    update_object(obj.key)
1✔
1620

1621
    return '', 200 if sent else 304
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc