• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

snarfed / bridgy-fed / a62808aa-5996-4422-8905-839df0c498c8

29 Jan 2025 07:57PM UTC coverage: 93.218%. Remained the same
a62808aa-5996-4422-8905-839df0c498c8

push

circleci

snarfed
cache /convert/ and /r/ endpoints in memcache

...since GAE's edge caching based on Cache-Control doesn't seem to be very effective :/

for #1149

7 of 7 new or added lines in 2 files covered. (100.0%)

58 existing lines in 5 files now uncovered.

4536 of 4866 relevant lines covered (93.22%)

0.93 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

94.87
/protocol.py
1
"""Base protocol class and common code."""
2
import copy
1✔
3
from datetime import datetime, timedelta
1✔
4
import logging
1✔
5
import os
1✔
6
import re
1✔
7
from threading import Lock
1✔
8
from urllib.parse import urljoin, urlparse
1✔
9

10
from cachetools import cached, LRUCache
1✔
11
from flask import request
1✔
12
from google.cloud import ndb
1✔
13
from google.cloud.ndb import OR
1✔
14
from google.cloud.ndb.model import _entity_to_protobuf
1✔
15
from granary import as1, as2
1✔
16
from granary.source import html_to_text
1✔
17
from oauth_dropins.webutil.appengine_info import DEBUG
1✔
18
from oauth_dropins.webutil.flask_util import cloud_tasks_only
1✔
19
from oauth_dropins.webutil import models
1✔
20
from oauth_dropins.webutil import util
1✔
21
from oauth_dropins.webutil.util import json_dumps, json_loads
1✔
22
from requests import RequestException
1✔
23
import werkzeug.exceptions
1✔
24
from werkzeug.exceptions import BadGateway, HTTPException
1✔
25

26
import common
1✔
27
from common import (
1✔
28
    DOMAIN_BLOCKLIST,
29
    DOMAIN_RE,
30
    DOMAINS,
31
    PRIMARY_DOMAIN,
32
    PROTOCOL_DOMAINS,
33
    report_error,
34
    subdomain_wrap,
35
)
36
import dms
1✔
37
import ids
1✔
38
from ids import (
1✔
39
    BOT_ACTOR_AP_IDS,
40
    normalize_user_id,
41
    translate_object_id,
42
    translate_user_id,
43
)
44
import memcache
1✔
45
from models import (
1✔
46
    DM,
47
    Follower,
48
    Object,
49
    PROTOCOLS,
50
    PROTOCOLS_BY_KIND,
51
    Target,
52
    User,
53
)
54

55
OBJECT_REFRESH_AGE = timedelta(days=30)
1✔
56
DELETE_TASK_DELAY = timedelta(minutes=2)
1✔
57
CREATE_MAX_AGE = timedelta(weeks=2)
1✔
58

59
# require a follow for users on these domains before we deliver anything from
60
# them other than their profile
61
LIMITED_DOMAINS = (os.getenv('LIMITED_DOMAINS', '').split()
1✔
62
                   or util.load_file_lines('limited_domains'))
63

64
DONT_STORE_AS1_TYPES = as1.CRUD_VERBS | set((
1✔
65
    'accept',
66
    'reject',
67
    'stop-following',
68
    'undo',
69
))
70
STORE_AS1_TYPES = (as1.ACTOR_TYPES | as1.POST_TYPES | as1.VERBS_WITH_OBJECT
1✔
71
                   - DONT_STORE_AS1_TYPES)
72

73
logger = logging.getLogger(__name__)
1✔
74

75

76
def error(*args, status=299, **kwargs):
1✔
77
    """Default HTTP status code to 299 to prevent retrying task."""
78
    return common.error(*args, status=status, **kwargs)
1✔
79

80

81
class ErrorButDoNotRetryTask(HTTPException):
1✔
82
    code = 299
1✔
83
    description = 'ErrorButDoNotRetryTask'
1✔
84

85
# https://github.com/pallets/flask/issues/1837#issuecomment-304996942
86
werkzeug.exceptions.default_exceptions.setdefault(299, ErrorButDoNotRetryTask)
1✔
87
werkzeug.exceptions._aborter.mapping.setdefault(299, ErrorButDoNotRetryTask)
1✔
88

89

90
def activity_id_memcache_key(id):
1✔
91
    return memcache.key(f'receive-{id}')
1✔
92

93

94
class Protocol:
1✔
95
    """Base protocol class. Not to be instantiated; classmethods only.
96

97
    Attributes:
98
      LABEL (str): human-readable lower case name
99
      OTHER_LABELS (list of str): label aliases
100
      ABBREV (str): lower case abbreviation, used in URL paths
101
      PHRASE (str): human-readable name or phrase. Used in phrases like
102
        ``Follow this person on {PHRASE}``
103
      LOGO_HTML (str): logo emoji or ``<img>`` tag
104
      CONTENT_TYPE (str): MIME type of this protocol's native data format,
105
        appropriate for the ``Content-Type`` HTTP header.
106
      HAS_COPIES (bool): whether this protocol is push and needs us to
107
        proactively create "copy" users and objects, as opposed to pulling
108
        converted objects on demand
109
      REQUIRES_AVATAR (bool): whether accounts on this protocol are required
110
        to have a profile picture. If they don't, their ``User.status`` will be
111
        ``blocked``.
112
      REQUIRES_NAME (bool): whether accounts on this protocol are required to
113
        have a profile name that's different than their handle or id. If they
114
        don't, their ``User.status`` will be ``blocked``.
115
      REQUIRES_OLD_ACCOUNT: (bool): whether accounts on this protocol are
116
        required to be at least :const:`common.OLD_ACCOUNT_AGE` old. If their
117
        profile includes creation date and it's not old enough, their
118
        ``User.status`` will be ``blocked``.
119
      DEFAULT_ENABLED_PROTOCOLS (sequence of str): labels of other protocols
120
        that are automatically enabled for this protocol to bridge into
121
      DEFAULT_SERVE_USER_PAGES (bool): whether to serve user pages for all of
122
        this protocol's users on the fed.brid.gy. If ``False``, user pages will
123
        only be served for users who have explictly opted in.
124
      SUPPORTED_AS1_TYPES (sequence of str): AS1 objectTypes and verbs that this
125
        protocol supports receiving and sending.
126
      SUPPORTS_DMS (bool): whether this protocol can receive DMs (chat messages)
127

128
    """
129
    ABBREV = None
1✔
130
    PHRASE = None
1✔
131
    OTHER_LABELS = ()
1✔
132
    LOGO_HTML = ''
1✔
133
    CONTENT_TYPE = None
1✔
134
    HAS_COPIES = False
1✔
135
    REQUIRES_AVATAR = False
1✔
136
    REQUIRES_NAME = False
1✔
137
    REQUIRES_OLD_ACCOUNT = False
1✔
138
    DEFAULT_ENABLED_PROTOCOLS = ()
1✔
139
    DEFAULT_SERVE_USER_PAGES = False
1✔
140
    SUPPORTED_AS1_TYPES = ()
1✔
141
    SUPPORTS_DMS = False
1✔
142

143
    def __init__(self):
1✔
UNCOV
144
        assert False
×
145

146
    @classmethod
1✔
147
    @property
1✔
148
    def LABEL(cls):
1✔
149
        return cls.__name__.lower()
1✔
150

151
    @staticmethod
1✔
152
    def for_request(fed=None):
1✔
153
        """Returns the protocol for the current request.
154

155
        ...based on the request's hostname.
156

157
        Args:
158
          fed (str or protocol.Protocol): protocol to return if the current
159
            request is on ``fed.brid.gy``
160

161
        Returns:
162
          Protocol: protocol, or None if the provided domain or request hostname
163
          domain is not a subdomain of ``brid.gy`` or isn't a known protocol
164
        """
165
        return Protocol.for_bridgy_subdomain(request.host, fed=fed)
1✔
166

167
    @staticmethod
1✔
168
    def for_bridgy_subdomain(domain_or_url, fed=None):
1✔
169
        """Returns the protocol for a brid.gy subdomain.
170

171
        Args:
172
          domain_or_url (str)
173
          fed (str or protocol.Protocol): protocol to return if the current
174
            request is on ``fed.brid.gy``
175

176
        Returns:
177
          class: :class:`Protocol` subclass, or None if the provided domain or request
178
          hostname domain is not a subdomain of ``brid.gy`` or isn't a known
179
          protocol
180
        """
181
        domain = (util.domain_from_link(domain_or_url, minimize=False)
1✔
182
                  if util.is_web(domain_or_url)
183
                  else domain_or_url)
184

185
        if domain == common.PRIMARY_DOMAIN or domain in common.LOCAL_DOMAINS:
1✔
186
            return PROTOCOLS[fed] if isinstance(fed, str) else fed
1✔
187
        elif domain and domain.endswith(common.SUPERDOMAIN):
1✔
188
            label = domain.removesuffix(common.SUPERDOMAIN)
1✔
189
            return PROTOCOLS.get(label)
1✔
190

191
    @classmethod
1✔
192
    def owns_id(cls, id):
1✔
193
        """Returns whether this protocol owns the id, or None if it's unclear.
194

195
        To be implemented by subclasses.
196

197
        IDs are string identities that uniquely identify users, and are intended
198
        primarily to be machine readable and usable. Compare to handles, which
199
        are human-chosen, human-meaningful, and often but not always unique.
200

201
        Some protocols' ids are more or less deterministic based on the id
202
        format, eg AT Protocol owns ``at://`` URIs. Others, like http(s) URLs,
203
        could be owned by eg Web or ActivityPub.
204

205
        This should be a quick guess without expensive side effects, eg no
206
        external HTTP fetches to fetch the id itself or otherwise perform
207
        discovery.
208

209
        Returns False if the id's domain is in :const:`common.DOMAIN_BLOCKLIST`.
210

211
        Args:
212
          id (str)
213

214
        Returns:
215
          bool or None:
216
        """
217
        return False
1✔
218

219
    @classmethod
1✔
220
    def owns_handle(cls, handle, allow_internal=False):
1✔
221
        """Returns whether this protocol owns the handle, or None if it's unclear.
222

223
        To be implemented by subclasses.
224

225
        Handles are string identities that are human-chosen, human-meaningful,
226
        and often but not always unique. Compare to IDs, which uniquely identify
227
        users, and are intended primarily to be machine readable and usable.
228

229
        Some protocols' handles are more or less deterministic based on the id
230
        format, eg ActivityPub (technically WebFinger) handles are
231
        ``@user@instance.com``. Others, like domains, could be owned by eg Web,
232
        ActivityPub, AT Protocol, or others.
233

234
        This should be a quick guess without expensive side effects, eg no
235
        external HTTP fetches to fetch the id itself or otherwise perform
236
        discovery.
237

238
        Args:
239
          handle (str)
240
          allow_internal (bool): whether to return False for internal domains
241
            like ``fed.brid.gy``, ``bsky.brid.gy``, etc
242

243
        Returns:
244
          bool or None
245
        """
246
        return False
1✔
247

248
    @classmethod
1✔
249
    def handle_to_id(cls, handle):
1✔
250
        """Converts a handle to an id.
251

252
        To be implemented by subclasses.
253

254
        May incur network requests, eg DNS queries or HTTP requests. Avoids
255
        blocked or opted out users.
256

257
        Args:
258
          handle (str)
259

260
        Returns:
261
          str: corresponding id, or None if the handle can't be found
262
        """
UNCOV
263
        raise NotImplementedError()
×
264

265
    @classmethod
1✔
266
    def key_for(cls, id, allow_opt_out=False):
1✔
267
        """Returns the :class:`google.cloud.ndb.Key` for a given id's :class:`models.User`.
268

269
        To be implemented by subclasses. Canonicalizes the id if necessary.
270

271
        If called via `Protocol.key_for`, infers the appropriate protocol with
272
        :meth:`for_id`. If called with a concrete subclass, uses that subclass
273
        as is.
274

275
        Args:
276
          id (str):
277
          allow_opt_out (bool): whether to allow users who are currently opted out
278

279
        Returns:
280
          google.cloud.ndb.Key: matching key, or None if the given id is not a
281
          valid :class:`User` id for this protocol.
282
        """
283
        if cls == Protocol:
1✔
284
            proto = Protocol.for_id(id)
1✔
285
            return proto.key_for(id, allow_opt_out=allow_opt_out) if proto else None
1✔
286

287
        # load user so that we follow use_instead
288
        existing = cls.get_by_id(id, allow_opt_out=True)
1✔
289
        if existing:
1✔
290
            if existing.status and not allow_opt_out:
1✔
291
                return None
1✔
292
            return existing.key
1✔
293

294
        return cls(id=id).key
1✔
295

296
    @staticmethod
1✔
297
    def _for_id_memcache_key(id, remote=None):
1✔
298
        """If id is a URL, uses its domain, otherwise returns None.
299

300
        Args:
301
          id (str)
302

303
        Returns:
304
          (str domain, bool remote) or None
305
        """
306
        if remote and util.is_web(id):
1✔
307
            return util.domain_from_link(id)
1✔
308

309
    @cached(LRUCache(20000), lock=Lock())
1✔
310
    @memcache.memoize(key=_for_id_memcache_key, write=lambda id, remote: remote,
1✔
311
                      version=3)
312
    @staticmethod
1✔
313
    def for_id(id, remote=True):
1✔
314
        """Returns the protocol for a given id.
315

316
        Args:
317
          id (str)
318
          remote (bool): whether to perform expensive side effects like fetching
319
            the id itself over the network, or other discovery.
320

321
        Returns:
322
          Protocol subclass: matching protocol, or None if no single known
323
          protocol definitively owns this id
324
        """
325
        logger.debug(f'Determining protocol for id {id}')
1✔
326
        if not id:
1✔
327
            return None
1✔
328

329
        # remove our synthetic id fragment, if any
330
        #
331
        # will this eventually cause false positives for other services that
332
        # include our full ids inside their own ids, non-URL-encoded? guess
333
        # we'll figure that out if/when it happens.
334
        id = id.partition('#bridgy-fed-')[0]
1✔
335

336
        if util.is_web(id):
1✔
337
            # step 1: check for our per-protocol subdomains
338
            try:
1✔
339
                is_homepage = urlparse(id).path.strip('/') == ''
1✔
340
            except ValueError as e:
1✔
341
                logger.info(f'urlparse ValueError: {e}')
1✔
342
                return None
1✔
343

344
            by_subdomain = Protocol.for_bridgy_subdomain(id)
1✔
345
            if by_subdomain and not is_homepage and id not in BOT_ACTOR_AP_IDS:
1✔
346
                logger.debug(f'  {by_subdomain.LABEL} owns id {id}')
1✔
347
                return by_subdomain
1✔
348

349
        # step 2: check if any Protocols say conclusively that they own it
350
        # sort to be deterministic
351
        protocols = sorted(set(p for p in PROTOCOLS.values() if p),
1✔
352
                           key=lambda p: p.LABEL)
353
        candidates = []
1✔
354
        for protocol in protocols:
1✔
355
            owns = protocol.owns_id(id)
1✔
356
            if owns:
1✔
357
                logger.debug(f'  {protocol.LABEL} owns id {id}')
1✔
358
                return protocol
1✔
359
            elif owns is not False:
1✔
360
                candidates.append(protocol)
1✔
361

362
        if len(candidates) == 1:
1✔
363
            logger.debug(f'  {candidates[0].LABEL} owns id {id}')
1✔
364
            return candidates[0]
1✔
365

366
        # step 3: look for existing Objects in the datastore
367
        obj = Protocol.load(id, remote=False)
1✔
368
        if obj and obj.source_protocol:
1✔
369
            logger.debug(f'  {obj.key.id()} owned by source_protocol {obj.source_protocol}')
1✔
370
            return PROTOCOLS[obj.source_protocol]
1✔
371

372
        # step 4: fetch over the network, if necessary
373
        if not remote:
1✔
374
            return None
1✔
375

376
        for protocol in candidates:
1✔
377
            logger.debug(f'Trying {protocol.LABEL}')
1✔
378
            try:
1✔
379
                obj = protocol.load(id, local=False, remote=True)
1✔
380

381
                if protocol.ABBREV == 'web':
1✔
382
                    # for web, if we fetch and get HTML without microformats,
383
                    # load returns False but the object will be stored in the
384
                    # datastore with source_protocol web, and in cache. load it
385
                    # again manually to check for that.
386
                    obj = Object.get_by_id(id)
1✔
387
                    if obj and obj.source_protocol != 'web':
1✔
UNCOV
388
                        obj = None
×
389

390
                if obj:
1✔
391
                    logger.debug(f'  {protocol.LABEL} owns id {id}')
1✔
392
                    return protocol
1✔
393
            except BadGateway:
1✔
394
                # we tried and failed fetching the id over the network.
395
                # this depends on ActivityPub.fetch raising this!
396
                return None
1✔
UNCOV
397
            except HTTPException as e:
×
398
                # internal error we generated ourselves; try next protocol
399
                pass
×
400
            except Exception as e:
×
401
                code, _ = util.interpret_http_exception(e)
×
UNCOV
402
                if code:
×
403
                    # we tried and failed fetching the id over the network
404
                    return None
×
UNCOV
405
                raise
×
406

407
        logger.info(f'No matching protocol found for {id} !')
1✔
408
        return None
1✔
409

410
    @cached(LRUCache(20000), lock=Lock())
1✔
411
    @staticmethod
1✔
412
    def for_handle(handle):
1✔
413
        """Returns the protocol for a given handle.
414

415
        May incur expensive side effects like resolving the handle itself over
416
        the network or other discovery.
417

418
        Args:
419
          handle (str)
420

421
        Returns:
422
          (Protocol subclass, str) tuple: matching protocol and optional id (if
423
          resolved), or ``(None, None)`` if no known protocol owns this handle
424
        """
425
        # TODO: normalize, eg convert domains to lower case
426
        logger.debug(f'Determining protocol for handle {handle}')
1✔
427
        if not handle:
1✔
428
            return (None, None)
1✔
429

430
        # step 1: check if any Protocols say conclusively that they own it.
431
        # sort to be deterministic.
432
        protocols = sorted(set(p for p in PROTOCOLS.values() if p),
1✔
433
                           key=lambda p: p.LABEL)
434
        candidates = []
1✔
435
        for proto in protocols:
1✔
436
            owns = proto.owns_handle(handle)
1✔
437
            if owns:
1✔
438
                logger.debug(f'  {proto.LABEL} owns handle {handle}')
1✔
439
                return (proto, None)
1✔
440
            elif owns is not False:
1✔
441
                candidates.append(proto)
1✔
442

443
        if len(candidates) == 1:
1✔
444
            logger.debug(f'  {candidates[0].LABEL} owns handle {handle}')
×
UNCOV
445
            return (candidates[0], None)
×
446

447
        # step 2: look for matching User in the datastore
448
        for proto in candidates:
1✔
449
            user = proto.query(proto.handle == handle).get()
1✔
450
            if user:
1✔
451
                if user.status:
1✔
452
                    return (None, None)
1✔
453
                logger.debug(f'  user {user.key} handle {handle}')
1✔
454
                return (proto, user.key.id())
1✔
455

456
        # step 3: resolve handle to id
457
        for proto in candidates:
1✔
458
            id = proto.handle_to_id(handle)
1✔
459
            if id:
1✔
460
                logger.debug(f'  {proto.LABEL} resolved handle {handle} to id {id}')
1✔
461
                return (proto, id)
1✔
462

463
        logger.info(f'No matching protocol found for handle {handle} !')
1✔
464
        return (None, None)
1✔
465

466
    @classmethod
1✔
467
    def bridged_web_url_for(cls, user, fallback=False):
1✔
468
        """Returns the web URL for a user's bridged profile in this protocol.
469

470
        For example, for Web user ``alice.com``, :meth:`ATProto.bridged_web_url_for`
471
        returns ``https://bsky.app/profile/alice.com.web.brid.gy``
472

473
        Args:
474
          user (models.User)
475
          fallback (bool): if True, and bridged users have no canonical user
476
            profile URL in this protocol, return the native protocol's profile URL
477

478
        Returns:
479
          str, or None if there isn't a canonical URL
480
        """
481
        if fallback:
1✔
482
            return user.web_url()
1✔
483

484
    @classmethod
1✔
485
    def actor_key(cls, obj, allow_opt_out=False):
1✔
486
        """Returns the :class:`User`: key for a given object's author or actor.
487

488
        Args:
489
          obj (models.Object)
490
          allow_opt_out (bool): whether to return a user key if they're opted out
491

492
        Returns:
493
          google.cloud.ndb.key.Key or None:
494
        """
495
        owner = as1.get_owner(obj.as1)
1✔
496
        if owner:
1✔
497
            return cls.key_for(owner, allow_opt_out=allow_opt_out)
1✔
498

499
    @classmethod
1✔
500
    def bot_user_id(cls):
1✔
501
        """Returns the Web user id for the bot user for this protocol.
502

503
        For example, ``'bsky.brid.gy'`` for ATProto.
504

505
        Returns:
506
          str:
507
        """
508
        return f'{cls.ABBREV}{common.SUPERDOMAIN}'
1✔
509

510
    @classmethod
1✔
511
    def create_for(cls, user):
1✔
512
        """Creates or re-activate a copy user in this protocol.
513

514
        Should add the copy user to :attr:`copies`.
515

516
        If the copy user already exists and active, should do nothing.
517

518
        Args:
519
          user (models.User): original source user. Shouldn't already have a
520
            copy user for this protocol in :attr:`copies`.
521

522
        Raises:
523
          ValueError: if we can't create a copy of the given user in this protocol
524
        """
UNCOV
525
        raise NotImplementedError()
×
526

527
    @classmethod
1✔
528
    def send(to_cls, obj, url, from_user=None, orig_obj_id=None):
1✔
529
        """Sends an outgoing activity.
530

531
        To be implemented by subclasses.
532

533
        NOTE: if this protocol's ``HAS_COPIES`` is True, and this method creates
534
        a copy and sends it, it *must* add that copy to the *object*'s (not
535
        activity's) :attr:`copies`!
536

537
        Args:
538
          obj (models.Object): with activity to send
539
          url (str): destination URL to send to
540
          from_user (models.User): user (actor) this activity is from
541
          orig_obj_id (str): :class:`models.Object` key id of the "original object"
542
            that this object refers to, eg replies to or reposts or likes
543

544
        Returns:
545
          bool: True if the activity is sent successfully, False if it is
546
          ignored or otherwise unsent due to protocol logic, eg no webmention
547
          endpoint, protocol doesn't support the activity type. (Failures are
548
          raised as exceptions.)
549

550
        Raises:
551
          werkzeug.HTTPException if the request fails
552
        """
UNCOV
553
        raise NotImplementedError()
×
554

555
    @classmethod
1✔
556
    def fetch(cls, obj, **kwargs):
1✔
557
        """Fetches a protocol-specific object and populates it in an :class:`Object`.
558

559
        Errors are raised as exceptions. If this method returns False, the fetch
560
        didn't fail but didn't succeed either, eg the id isn't valid for this
561
        protocol, or the fetch didn't return valid data for this protocol.
562

563
        To be implemented by subclasses.
564

565
        Args:
566
          obj (models.Object): with the id to fetch. Data is filled into one of
567
            the protocol-specific properties, eg ``as2``, ``mf2``, ``bsky``.
568
          kwargs: subclass-specific
569

570
        Returns:
571
          bool: True if the object was fetched and populated successfully,
572
          False otherwise
573

574
        Raises:
575
          requests.RequestException or werkzeug.HTTPException: if the fetch fails
576
        """
UNCOV
577
        raise NotImplementedError()
×
578

579
    @classmethod
1✔
580
    def convert(cls, obj, from_user=None, **kwargs):
1✔
581
        """Converts an :class:`Object` to this protocol's data format.
582

583
        For example, an HTML string for :class:`Web`, or a dict with AS2 JSON
584
        and ``application/activity+json`` for :class:`ActivityPub`.
585

586
        Just passes through to :meth:`_convert`, then does minor
587
        protocol-independent postprocessing.
588

589
        Args:
590
          obj (models.Object):
591
          from_user (models.User): user (actor) this activity/object is from
592
          kwargs: protocol-specific, passed through to :meth:`_convert`
593

594
        Returns:
595
          converted object in the protocol's native format, often a dict
596
        """
597
        if not obj or not obj.as1:
1✔
598
            return {}
1✔
599

600
        id = obj.key.id() if obj.key else obj.as1.get('id')
1✔
601
        is_activity = obj.as1.get('verb') in ('post', 'update')
1✔
602
        base_obj = as1.get_object(obj.as1) if is_activity else obj.as1
1✔
603
        orig_our_as1 = obj.our_as1
1✔
604

605
        # mark bridged actors as bots and add "bridged by Bridgy Fed" to their bios
606
        if (from_user and base_obj
1✔
607
            and base_obj.get('objectType') in as1.ACTOR_TYPES
608
            and PROTOCOLS.get(obj.source_protocol) != cls
609
            and Protocol.for_bridgy_subdomain(id) not in DOMAINS
610
            # Web users are special cased, they don't get the label if they've
611
            # explicitly enabled Bridgy Fed with redirects or webmentions
612
            and not (from_user.LABEL == 'web'
613
                     and (from_user.last_webmention_in or from_user.has_redirects))):
614

615
            obj.our_as1 = copy.deepcopy(obj.as1)
1✔
616
            actor = as1.get_object(obj.as1) if is_activity else obj.as1
1✔
617
            actor['objectType'] = 'person'
1✔
618
            cls.add_source_links(actor=actor, obj=obj, from_user=from_user)
1✔
619

620
        converted = cls._convert(obj, from_user=from_user, **kwargs)
1✔
621
        obj.our_as1 = orig_our_as1
1✔
622
        return converted
1✔
623

624
    @classmethod
1✔
625
    def _convert(cls, obj, from_user=None, **kwargs):
1✔
626
        """Converts an :class:`Object` to this protocol's data format.
627

628
        To be implemented by subclasses. Implementations should generally call
629
        :meth:`Protocol.translate_ids` (as their own class) before converting to
630
        their format.
631

632
        Args:
633
          obj (models.Object):
634
          from_user (models.User): user (actor) this activity/object is from
635
          kwargs: protocol-specific
636

637
        Returns:
638
          converted object in the protocol's native format, often a dict. May
639
            return the ``{}`` empty dict if the object can't be converted.
640
        """
UNCOV
641
        raise NotImplementedError()
×
642

643
    @classmethod
1✔
644
    def add_source_links(cls, actor, obj, from_user):
1✔
645
        """Adds "bridged from ... by Bridgy Fed" HTML to ``actor['summary']``.
646

647
        Default implementation; subclasses may override.
648

649
        Args:
650
          actor (dict): AS1 actor
651
          obj (models.Object):
652
          from_user (models.User): user (actor) this activity/object is from
653
        """
654
        assert from_user
1✔
655
        summary = actor.setdefault('summary', '')
1✔
656
        if 'Bridgy Fed]' in html_to_text(summary, ignore_links=True):
1✔
657
            return
1✔
658

659
        id = actor.get('id')
1✔
660
        proto_phrase = (PROTOCOLS[obj.source_protocol].PHRASE
1✔
661
                        if obj.source_protocol else '')
662
        if proto_phrase:
1✔
663
            proto_phrase = f' on {proto_phrase}'
1✔
664

665
        if from_user.key and id in (from_user.key.id(), from_user.profile_id()):
1✔
666
            source_links = f'[<a href="https://{PRIMARY_DOMAIN}{from_user.user_page_path()}">bridged</a> from <a href="{from_user.web_url()}">{from_user.handle}</a>{proto_phrase} by <a href="https://{PRIMARY_DOMAIN}/">Bridgy Fed</a>]'
1✔
667

668
        else:
669
            url = as1.get_url(actor) or id
1✔
670
            source = util.pretty_link(url) if url else '?'
1✔
671
            source_links = f'[bridged from {source}{proto_phrase} by <a href="https://{PRIMARY_DOMAIN}/">Bridgy Fed</a>]'
1✔
672

673
        if summary:
1✔
674
            summary += '<br><br>'
1✔
675
        actor['summary'] = summary + source_links
1✔
676

677
    @classmethod
1✔
678
    def set_username(to_cls, user, username):
1✔
679
        """Sets a custom username for a user's bridged account in this protocol.
680

681
        Args:
682
          user (models.User)
683
          username (str)
684

685
        Raises:
686
          ValueError: if the username is invalid
687
          RuntimeError: if the username could not be set
688
        """
689
        raise NotImplementedError()
1✔
690

691
    @classmethod
1✔
692
    def target_for(cls, obj, shared=False):
1✔
693
        """Returns an :class:`Object`'s delivery target (endpoint).
694

695
        To be implemented by subclasses.
696

697
        Examples:
698

699
        * If obj has ``source_protocol`` ``web``, returns its URL, as a
700
          webmention target.
701
        * If obj is an ``activitypub`` actor, returns its inbox.
702
        * If obj is an ``activitypub`` object, returns it's author's or actor's
703
          inbox.
704

705
        Args:
706
          obj (models.Object):
707
          shared (bool): optional. If True, returns a common/shared
708
            endpoint, eg ActivityPub's ``sharedInbox``, that can be reused for
709
            multiple recipients for efficiency
710

711
        Returns:
712
          str: target endpoint, or None if not available.
713
        """
UNCOV
714
        raise NotImplementedError()
×
715

716
    @classmethod
1✔
717
    def is_blocklisted(cls, url, allow_internal=False):
1✔
718
        """Returns True if we block the given URL and shouldn't deliver to it.
719

720
        Default implementation here, subclasses may override.
721

722
        Args:
723
          url (str):
724
          allow_internal (bool): whether to return False for internal domains
725
            like ``fed.brid.gy``, ``bsky.brid.gy``, etc
726
        """
727
        blocklist = DOMAIN_BLOCKLIST
1✔
728
        if not allow_internal:
1✔
729
            blocklist += DOMAINS
1✔
730
        return util.domain_or_parent_in(util.domain_from_link(url), blocklist)
1✔
731

732
    @classmethod
1✔
733
    def translate_ids(to_cls, obj):
1✔
734
        """Translates all ids in an AS1 object to a specific protocol.
735

736
        Infers source protocol for each id value separately.
737

738
        For example, if ``proto`` is :class:`ActivityPub`, the ATProto URI
739
        ``at://did:plc:abc/coll/123`` will be converted to
740
        ``https://bsky.brid.gy/ap/at://did:plc:abc/coll/123``.
741

742
        Wraps these AS1 fields:
743

744
        * ``id``
745
        * ``actor``
746
        * ``author``
747
        * ``bcc``
748
        * ``bto``
749
        * ``cc``
750
        * ``object``
751
        * ``object.actor``
752
        * ``object.author``
753
        * ``object.id``
754
        * ``object.inReplyTo``
755
        * ``object.object``
756
        * ``attachments[].id``
757
        * ``tags[objectType=mention].url``
758
        * ``to``
759

760
        This is the inverse of :meth:`models.Object.resolve_ids`. Much of the
761
        same logic is duplicated there!
762

763
        TODO: unify with :meth:`Object.resolve_ids`,
764
        :meth:`models.Object.normalize_ids`.
765

766
        Args:
767
          to_proto (Protocol subclass)
768
          obj (dict): AS1 object or activity (not :class:`models.Object`!)
769

770
        Returns:
771
          dict: wrapped AS1 version of ``obj``
772
        """
773
        assert to_cls != Protocol
1✔
774
        if not obj:
1✔
775
            return obj
1✔
776

777
        outer_obj = copy.deepcopy(obj)
1✔
778
        inner_objs = outer_obj['object'] = as1.get_objects(outer_obj)
1✔
779

780
        def translate(elem, field, fn, uri=False):
1✔
781
            elem[field] = as1.get_objects(elem, field)
1✔
782
            for obj in elem[field]:
1✔
783
                if id := obj.get('id'):
1✔
784
                    if field in ('to', 'cc', 'bcc', 'bto') and as1.is_audience(id):
1✔
785
                        continue
1✔
786
                    from_cls = Protocol.for_id(id)
1✔
787
                    # TODO: what if from_cls is None? relax translate_object_id,
788
                    # make it a noop if we don't know enough about from/to?
789
                    if from_cls and from_cls != to_cls:
1✔
790
                        obj['id'] = fn(id=id, from_=from_cls, to=to_cls)
1✔
791
                    if obj['id'] and uri:
1✔
792
                        obj['id'] = to_cls(id=obj['id']).id_uri()
1✔
793

794
            elem[field] = [o['id'] if o.keys() == {'id'} else o
1✔
795
                           for o in elem[field]]
796

797
            if len(elem[field]) == 1:
1✔
798
                elem[field] = elem[field][0]
1✔
799

800
        type = as1.object_type(outer_obj)
1✔
801
        translate(outer_obj, 'id',
1✔
802
                  translate_user_id if type in as1.ACTOR_TYPES
803
                  else translate_object_id)
804

805
        for o in inner_objs:
1✔
806
            is_actor = (as1.object_type(o) in as1.ACTOR_TYPES
1✔
807
                        or as1.get_owner(outer_obj) == o.get('id')
808
                        or type in ('follow', 'stop-following'))
809
            translate(o, 'id', translate_user_id if is_actor else translate_object_id)
1✔
810
            obj_is_actor = o.get('verb') in as1.VERBS_WITH_ACTOR_OBJECT
1✔
811
            translate(o, 'object', translate_user_id if obj_is_actor
1✔
812
                      else translate_object_id)
813

814
        for o in [outer_obj] + inner_objs:
1✔
815
            translate(o, 'inReplyTo', translate_object_id)
1✔
816
            for field in 'actor', 'author', 'to', 'cc', 'bto', 'bcc':
1✔
817
                translate(o, field, translate_user_id)
1✔
818
            for tag in as1.get_objects(o, 'tags'):
1✔
819
                if tag.get('objectType') == 'mention':
1✔
820
                    translate(tag, 'url', translate_user_id, uri=True)
1✔
821
            for att in as1.get_objects(o, 'attachments'):
1✔
822
                translate(att, 'id', translate_object_id)
1✔
823
                url = att.get('url')
1✔
824
                if url and not att.get('id'):
1✔
825
                    if from_cls := Protocol.for_id(url):
1✔
826
                        att['id'] = translate_object_id(from_=from_cls, to=to_cls,
1✔
827
                                                        id=url)
828

829
        outer_obj = util.trim_nulls(outer_obj)
1✔
830

831
        if objs := util.get_list(outer_obj ,'object'):
1✔
832
            outer_obj['object'] = [o['id'] if o.keys() == {'id'} else o for o in objs]
1✔
833
            if len(outer_obj['object']) == 1:
1✔
834
                outer_obj['object'] = outer_obj['object'][0]
1✔
835

836
        return outer_obj
1✔
837

838
    @classmethod
1✔
839
    def receive(from_cls, obj, authed_as=None, internal=False, received_at=None):
1✔
840
        """Handles an incoming activity.
841

842
        If ``obj``'s key is unset, ``obj.as1``'s id field is used. If both are
843
        unset, returns HTTP 299.
844

845
        Args:
846
          obj (models.Object)
847
          authed_as (str): authenticated actor id who sent this activity
848
          internal (bool): whether to allow activity ids on internal domains,
849
            from opted out/blocked users, etc.
850
          received_at (datetime): when we first saw (received) this activity.
851
            Right now only used for monitoring.
852

853
        Returns:
854
          (str, int) tuple: (response body, HTTP status code) Flask response
855

856
        Raises:
857
          werkzeug.HTTPException: if the request is invalid
858
        """
859
        # check some invariants
860
        assert from_cls != Protocol
1✔
861
        assert isinstance(obj, Object), obj
1✔
862

863
        if not obj.as1:
1✔
UNCOV
864
            error('No object data provided')
×
865

866
        id = None
1✔
867
        if obj.key and obj.key.id():
1✔
868
            id = obj.key.id()
1✔
869

870
        if not id:
1✔
871
            id = obj.as1.get('id')
1✔
872
            obj.key = ndb.Key(Object, id)
1✔
873

874
        if not id:
1✔
UNCOV
875
            error('No id provided')
×
876
        elif from_cls.owns_id(id) is False:
1✔
877
            error(f'Protocol {from_cls.LABEL} does not own id {id}')
1✔
878
        elif from_cls.is_blocklisted(id, allow_internal=internal):
1✔
879
            error(f'Activity {id} is blocklisted')
1✔
880
        # check that this activity is public. only do this for some activities,
881
        # not eg likes or follows, since Mastodon doesn't currently mark those
882
        # as explicitly public.
883
        elif (obj.type in set(('post', 'update')) | as1.POST_TYPES | as1.ACTOR_TYPES
1✔
884
                  and not as1.is_public(obj.as1, unlisted=False)
885
                  and not as1.is_dm(obj.as1)):
886
              logger.info('Dropping non-public activity')
1✔
887
              return ('OK', 200)
1✔
888

889
        # lease this object, atomically
890
        memcache_key = activity_id_memcache_key(id)
1✔
891
        leased = memcache.memcache.add(memcache_key, 'leased', noreply=False,
1✔
892
                                     expire=5 * 60)  # 5 min
893
        # short circuit if we've already seen this activity id.
894
        # (don't do this for bare objects since we need to check further down
895
        # whether they've been updated since we saw them last.)
896
        if (obj.as1.get('objectType') == 'activity'
1✔
897
            and 'force' not in request.values
898
            and (not leased
899
                 or (obj.new is False and obj.changed is False))):
900
            error(f'Already seen this activity {id}', status=204)
1✔
901

902
        pruned = {k: v for k, v in obj.as1.items()
1✔
903
                  if k not in ('contentMap', 'replies', 'signature')}
904
        delay = ''
1✔
905
        if (received_at and request.headers.get('X-AppEngine-TaskRetryCount') == '0'
1✔
906
                and obj.type != 'delete'):  # we delay deletes for 2m
UNCOV
907
            delay_s = int((util.now().replace(tzinfo=None)
×
908
                           - received_at.replace(tzinfo=None)
909
                           ).total_seconds())
UNCOV
910
            delay = f'({delay_s} s behind)'
×
911
        logger.info(f'Receiving {from_cls.LABEL} {obj.type} {id} {delay} AS1: {json_dumps(pruned, indent=2)}')
1✔
912

913
        # does this protocol support this activity/object type?
914
        from_cls.check_supported(obj)
1✔
915

916
        # check authorization
917
        # https://www.w3.org/wiki/ActivityPub/Primer/Authentication_Authorization
918
        actor = as1.get_owner(obj.as1)
1✔
919
        if not actor:
1✔
920
            error('Activity missing actor or author')
1✔
921
        elif from_cls.owns_id(actor) is False:
1✔
922
            error(f"{from_cls.LABEL} doesn't own actor {actor}, this is probably a bridged activity. Skipping.", status=204)
1✔
923

924
        assert authed_as
1✔
925
        assert isinstance(authed_as, str)
1✔
926
        authed_as = normalize_user_id(id=authed_as, proto=from_cls)
1✔
927
        actor = normalize_user_id(id=actor, proto=from_cls)
1✔
928
        if actor != authed_as:
1✔
929
            report_error("Auth: receive: authed_as doesn't match owner",
1✔
930
                         user=f'{id} authed_as {authed_as} owner {actor}')
931
            error(f"actor {actor} isn't authed user {authed_as}")
1✔
932

933
        # update copy ids to originals
934
        obj.normalize_ids()
1✔
935
        obj.resolve_ids()
1✔
936

937
        if (obj.type == 'follow'
1✔
938
                and Protocol.for_bridgy_subdomain(as1.get_object(obj.as1).get('id'))):
939
            # follows of bot user; refresh user profile first
940
            logger.info(f'Follow of bot user, reloading {actor}')
1✔
941
            from_user = from_cls.get_or_create(id=actor, allow_opt_out=True)
1✔
942
            from_user.reload_profile()
1✔
943
        else:
944
            # load actor user
945
            from_user = from_cls.get_or_create(id=actor, allow_opt_out=internal)
1✔
946

947
        if not internal and (not from_user
1✔
948
                             or from_user.manual_opt_out
949
                             # we want to override opt-out but not manual or blocked
950
                             or (from_user.status and from_user.status != 'opt-out')):
951
            error(f'Actor {actor} is opted out or blocked', status=204)
1✔
952

953
        # if this is an object, ie not an activity, wrap it in a create or update
954
        obj = from_cls.handle_bare_object(obj, authed_as=authed_as)
1✔
955
        obj.add('users', from_user.key)
1✔
956

957
        inner_obj_as1 = as1.get_object(obj.as1)
1✔
958
        inner_obj_id = inner_obj_as1.get('id')
1✔
959
        if obj.type in as1.CRUD_VERBS | as1.VERBS_WITH_OBJECT:
1✔
960
            if not inner_obj_id:
1✔
961
                error(f'{obj.type} object has no id!')
1✔
962

963
        # check age. we support backdated posts, but if they're over 2w old, we
964
        # don't deliver them
965
        if obj.type == 'post':
1✔
966
            if published := inner_obj_as1.get('published'):
1✔
967
                try:
1✔
968
                    age = util.now() - util.parse_iso8601(published)
1✔
969
                    if age > CREATE_MAX_AGE:
1✔
970
                        error(f'Ignoring, too old, {age} is over {CREATE_MAX_AGE}',
1✔
971
                              status=204)
972
                except ValueError:  # from parse_iso8601
1✔
UNCOV
973
                    logger.debug(f"Couldn't parse published {published}")
×
974

975
        # write Object to datastore
976
        obj.source_protocol = from_cls.LABEL
1✔
977
        if obj.type in STORE_AS1_TYPES:
1✔
978
            obj.put()
1✔
979

980
        # store inner object
981
        # TODO: unify with big obj.type conditional below. would have to merge
982
        # this with the DM handling block lower down.
983
        crud_obj = None
1✔
984
        if obj.type in ('post', 'update') and inner_obj_as1.keys() > set(['id']):
1✔
985
            crud_obj = Object.get_or_create(inner_obj_id, our_as1=inner_obj_as1,
1✔
986
                                            source_protocol=from_cls.LABEL,
987
                                            authed_as=actor, users=[from_user.key])
988

989
        actor = as1.get_object(obj.as1, 'actor')
1✔
990
        actor_id = actor.get('id')
1✔
991

992
        # handle activity!
993
        if obj.type == 'stop-following':
1✔
994
            # TODO: unify with handle_follow?
995
            # TODO: handle multiple followees
996
            if not actor_id or not inner_obj_id:
1✔
UNCOV
997
                error(f'stop-following requires actor id and object id. Got: {actor_id} {inner_obj_id} {obj.as1}')
×
998

999
            # deactivate Follower
1000
            from_ = from_cls.key_for(actor_id)
1✔
1001
            to_cls = Protocol.for_id(inner_obj_id)
1✔
1002
            to = to_cls.key_for(inner_obj_id)
1✔
1003
            follower = Follower.query(Follower.to == to,
1✔
1004
                                      Follower.from_ == from_,
1005
                                      Follower.status == 'active').get()
1006
            if follower:
1✔
1007
                logger.info(f'Marking {follower} inactive')
1✔
1008
                follower.status = 'inactive'
1✔
1009
                follower.put()
1✔
1010
            else:
1011
                logger.warning(f'No Follower found for {from_} => {to}')
1✔
1012

1013
            # fall through to deliver to followee
1014
            # TODO: do we convert stop-following to webmention 410 of original
1015
            # follow?
1016

1017
            # fall through to deliver to followers
1018

1019
        elif obj.type in ('delete', 'undo'):
1✔
1020
            delete_obj_id = (from_user.profile_id()
1✔
1021
                            if inner_obj_id == from_user.key.id()
1022
                            else inner_obj_id)
1023

1024
            delete_obj = Object.get_by_id(delete_obj_id, authed_as=authed_as)
1✔
1025
            if not delete_obj:
1✔
1026
                logger.info(f"Ignoring, we don't have {delete_obj_id} stored")
1✔
1027
                return 'OK', 204
1✔
1028

1029
            # TODO: just delete altogether!
1030
            logger.info(f'Marking Object {delete_obj_id} deleted')
1✔
1031
            delete_obj.deleted = True
1✔
1032
            delete_obj.put()
1✔
1033

1034
            # if this is an actor, handle deleting it later so that
1035
            # in case it's from_user, user.enabled_protocols is still populated
1036
            #
1037
            # fall through to deliver to followers and delete copy if necessary.
1038
            # should happen via protocol-specific copy target and send of
1039
            # delete activity.
1040
            # https://github.com/snarfed/bridgy-fed/issues/63
1041

1042
        elif obj.type == 'block':
1✔
1043
            if proto := Protocol.for_bridgy_subdomain(inner_obj_id):
1✔
1044
                # blocking protocol bot user disables that protocol
1045
                from_user.delete(proto)
1✔
1046
                from_user.disable_protocol(proto)
1✔
1047
                return 'OK', 200
1✔
1048

1049
        elif obj.type == 'post':
1✔
1050
            # handle DMs to bot users
1051
            if as1.is_dm(obj.as1):
1✔
1052
                return dms.receive(from_user=from_user, obj=obj)
1✔
1053

1054
        # fetch actor if necessary
1055
        if (actor and actor.keys() == set(['id'])
1✔
1056
                and obj.type not in ('delete', 'undo')):
1057
            logger.debug('Fetching actor so we have name, profile photo, etc')
1✔
1058
            actor_obj = from_cls.load(ids.profile_id(id=actor['id'], proto=from_cls),
1✔
1059
                                      raise_=False)
1060
            if actor_obj and actor_obj.as1:
1✔
1061
                obj.our_as1 = {
1✔
1062
                    **obj.as1, 'actor': {
1063
                        **actor_obj.as1,
1064
                        # override profile id with actor id
1065
                        # https://github.com/snarfed/bridgy-fed/issues/1720
1066
                        'id': actor['id'],
1067
                    }
1068
                }
1069

1070
        # fetch object if necessary
1071
        if (obj.type in ('post', 'update', 'share')
1✔
1072
                and inner_obj_as1.keys() == set(['id'])
1073
                and from_cls.owns_id(inner_obj_id)):
1074
            logger.debug('Fetching inner object')
1✔
1075
            inner_obj = from_cls.load(inner_obj_id, raise_=False,
1✔
1076
                                      remote=(obj.type in ('post', 'update')))
1077
            if obj.type in ('post', 'update'):
1✔
1078
                crud_obj = inner_obj
1✔
1079
            if inner_obj and inner_obj.as1:
1✔
1080
                obj.our_as1 = {
1✔
1081
                    **obj.as1,
1082
                    'object': {
1083
                        **inner_obj_as1,
1084
                        **inner_obj.as1,
1085
                    }
1086
                }
1087
            elif obj.type in ('post', 'update'):
1✔
1088
                error("Need object {inner_obj_id} but couldn't fetch, giving up")
1✔
1089

1090
        if obj.type == 'follow':
1✔
1091
            if proto := Protocol.for_bridgy_subdomain(inner_obj_id):
1✔
1092
                # follow of one of our protocol bot users; enable that protocol.
1093
                # fall through so that we send an accept.
1094
                from_user.enable_protocol(proto)
1✔
1095
                proto.bot_follow(from_user)
1✔
1096

1097
            from_cls.handle_follow(obj)
1✔
1098

1099
        # deliver to targets
1100
        resp = from_cls.deliver(obj, from_user=from_user, crud_obj=crud_obj)
1✔
1101

1102
        # if this is a user, deactivate its followers/followings
1103
        # https://github.com/snarfed/bridgy-fed/issues/1304
1104
        if obj.type == 'delete':
1✔
1105
            if user_key := from_cls.key_for(id=inner_obj_id):
1✔
1106
                if user := user_key.get():
1✔
1107
                    for proto in user.enabled_protocols:
1✔
1108
                        user.disable_protocol(PROTOCOLS[proto])
1✔
1109

1110
                    logger.info(f'Deactivating Followers from or to {user_key.id()}')
1✔
1111
                    followers = Follower.query(
1✔
1112
                        OR(Follower.to == user_key, Follower.from_ == user_key)
1113
                        ).fetch()
1114
                    for f in followers:
1✔
1115
                        f.status = 'inactive'
1✔
1116
                    ndb.put_multi(followers)
1✔
1117

1118
        memcache.memcache.set(memcache_key, 'done', expire=7 * 24 * 60 * 60)  # 1w
1✔
1119
        return resp
1✔
1120

1121
    @classmethod
1✔
1122
    def handle_follow(from_cls, obj):
1✔
1123
        """Handles an incoming follow activity.
1124

1125
        Sends an ``Accept`` back, but doesn't send the ``Follow`` itself. That
1126
        happens in :meth:`deliver`.
1127

1128
        Args:
1129
          obj (models.Object): follow activity
1130
        """
1131
        logger.debug('Got follow. Loading users, storing Follow(s), sending accept(s)')
1✔
1132

1133
        # Prepare follower (from) users' data
1134
        # TODO: remove all of this and just use from_user
1135
        from_as1 = as1.get_object(obj.as1, 'actor')
1✔
1136
        from_id = from_as1.get('id')
1✔
1137
        if not from_id:
1✔
UNCOV
1138
            error(f'Follow activity requires actor. Got: {obj.as1}')
×
1139

1140
        from_obj = from_cls.load(from_id, raise_=False)
1✔
1141
        if not from_obj:
1✔
UNCOV
1142
            error(f"Couldn't load {from_id}", status=502)
×
1143

1144
        if not from_obj.as1:
1✔
1145
            from_obj.our_as1 = from_as1
1✔
1146
            from_obj.put()
1✔
1147

1148
        from_key = from_cls.key_for(from_id)
1✔
1149
        if not from_key:
1✔
1150
            error(f'Invalid {from_cls.LABEL} user key: {from_id}')
×
1151
        obj.users = [from_key]
1✔
1152
        from_user = from_cls.get_or_create(id=from_key.id(), obj=from_obj)
1✔
1153

1154
        # Prepare followee (to) users' data
1155
        to_as1s = as1.get_objects(obj.as1)
1✔
1156
        if not to_as1s:
1✔
UNCOV
1157
            error(f'Follow activity requires object(s). Got: {obj.as1}')
×
1158

1159
        # Store Followers
1160
        for to_as1 in to_as1s:
1✔
1161
            to_id = to_as1.get('id')
1✔
1162
            if not to_id:
1✔
UNCOV
1163
                error(f'Follow activity requires object(s). Got: {obj.as1}')
×
1164

1165
            logger.info(f'Follow {from_id} => {to_id}')
1✔
1166

1167
            to_cls = Protocol.for_id(to_id)
1✔
1168
            if not to_cls:
1✔
1169
                error(f"Couldn't determine protocol for {to_id}")
×
1170
            elif from_cls == to_cls:
1✔
1171
                logger.info(f'Skipping same-protocol Follower {from_id} => {to_id}')
1✔
1172
                continue
1✔
1173

1174
            to_obj = to_cls.load(to_id)
1✔
1175
            if to_obj and not to_obj.as1:
1✔
1176
                to_obj.our_as1 = to_as1
1✔
1177
                to_obj.put()
1✔
1178

1179
            to_key = to_cls.key_for(to_id)
1✔
1180
            if not to_key:
1✔
UNCOV
1181
                logger.info(f'Skipping invalid {from_cls.LABEL} user key: {from_id}')
×
UNCOV
1182
                continue
×
1183

1184
            to_user = to_cls.get_or_create(id=to_key.id(), obj=to_obj,
1✔
1185
                                           allow_opt_out=True)
1186
            follower_obj = Follower.get_or_create(to=to_user, from_=from_user,
1✔
1187
                                                  follow=obj.key, status='active')
1188
            obj.add('notify', to_key)
1✔
1189
            from_cls.maybe_accept_follow(follower=from_user, followee=to_user,
1✔
1190
                                         follow=obj)
1191

1192
    @classmethod
1✔
1193
    def maybe_accept_follow(_, follower, followee, follow):
1✔
1194
        """Sends an accept activity for a follow.
1195

1196
        ...if the follower protocol handles accepts. Otherwise, does nothing.
1197

1198
        Args:
1199
          follower: :class:`models.User`
1200
          followee: :class:`models.User`
1201
          follow: :class:`models.Object`
1202
        """
1203
        if 'accept' not in follower.SUPPORTED_AS1_TYPES:
1✔
1204
            return
1✔
1205

1206
        target = follower.target_for(follower.obj)
1✔
1207
        if not target:
1✔
UNCOV
1208
            error(f"Couldn't find delivery target for follower {follower.key.id()}")
×
1209

1210
        # send accept. note that this is one accept for the whole
1211
        # follow, even if it has multiple followees!
1212
        id = f'{followee.key.id()}/followers#accept-{follow.key.id()}'
1✔
1213
        accept = {
1✔
1214
            'id': id,
1215
            'objectType': 'activity',
1216
            'verb': 'accept',
1217
            'actor': followee.key.id(),
1218
            'object': follow.as1,
1219
        }
1220
        common.create_task(queue='send', id=id, our_as1=accept, url=target,
1✔
1221
                           protocol=follower.LABEL, user=followee.key.urlsafe())
1222

1223
    @classmethod
1✔
1224
    def bot_follow(bot_cls, user):
1✔
1225
        """Follow a user from a protocol bot user.
1226

1227
        ...so that the protocol starts sending us their activities, if it needs
1228
        a follow for that (eg ActivityPub).
1229

1230
        Args:
1231
          user (User)
1232
        """
1233
        from web import Web
1✔
1234
        bot = Web.get_by_id(bot_cls.bot_user_id())
1✔
1235
        now = util.now().isoformat()
1✔
1236
        logger.info(f'Following {user.key.id()} back from bot user {bot.key.id()}')
1✔
1237

1238
        if not user.obj:
1✔
1239
            logger.info("  can't follow, user has no profile obj")
1✔
1240
            return
1✔
1241

1242
        target = user.target_for(user.obj)
1✔
1243
        follow_back_id = f'https://{bot.key.id()}/#follow-back-{user.key.id()}-{now}'
1✔
1244
        follow_back_as1 = {
1✔
1245
            'objectType': 'activity',
1246
            'verb': 'follow',
1247
            'id': follow_back_id,
1248
            'actor': bot.key.id(),
1249
            'object': user.key.id(),
1250
        }
1251
        common.create_task(queue='send', id=follow_back_id,
1✔
1252
                           our_as1=follow_back_as1, url=target,
1253
                           source_protocol='web', protocol=user.LABEL,
1254
                           user=bot.key.urlsafe())
1255

1256
    @classmethod
1✔
1257
    def handle_bare_object(cls, obj, authed_as=None):
1✔
1258
        """If obj is a bare object, wraps it in a create or update activity.
1259

1260
        Checks if we've seen it before.
1261

1262
        Args:
1263
          obj (models.Object)
1264
          authed_as (str): authenticated actor id who sent this activity
1265

1266
        Returns:
1267
          models.Object: ``obj`` if it's an activity, otherwise a new object
1268
        """
1269
        is_actor = obj.type in as1.ACTOR_TYPES
1✔
1270
        if not is_actor and obj.type not in ('note', 'article', 'comment'):
1✔
1271
            return obj
1✔
1272

1273
        obj_actor = ids.normalize_user_id(id=as1.get_owner(obj.as1), proto=cls)
1✔
1274
        now = util.now().isoformat()
1✔
1275

1276
        # occasionally we override the object, eg if this is a profile object
1277
        # coming in via a user with use_instead set
1278
        obj_as1 = obj.as1
1✔
1279
        if obj_id := obj.key.id():
1✔
1280
            if obj_as1_id := obj_as1.get('id'):
1✔
1281
                if obj_id != obj_as1_id:
1✔
1282
                    logger.info(f'Overriding AS1 object id {obj_as1_id} with Object id {obj_id}')
1✔
1283
                    obj_as1['id'] = obj_id
1✔
1284

1285
        # this is a raw post; wrap it in a create or update activity
1286
        if obj.changed or is_actor:
1✔
1287
            if obj.changed:
1✔
1288
                logger.info(f'Content has changed from last time at {obj.updated}! Redelivering to all inboxes')
1✔
1289
            else:
1290
                logger.info(f'Got actor profile object, wrapping in update')
1✔
1291
            id = f'{obj.key.id()}#bridgy-fed-update-{now}'
1✔
1292
            update_as1 = {
1✔
1293
                'objectType': 'activity',
1294
                'verb': 'update',
1295
                'id': id,
1296
                'actor': obj_actor,
1297
                'object': {
1298
                    # Mastodon requires the updated field for Updates, so
1299
                    # add a default value.
1300
                    # https://docs.joinmastodon.org/spec/activitypub/#supported-activities-for-statuses
1301
                    # https://socialhub.activitypub.rocks/t/what-could-be-the-reason-that-my-update-activity-does-not-work/2893/4
1302
                    # https://github.com/mastodon/documentation/pull/1150
1303
                    'updated': now,
1304
                    **obj_as1,
1305
                },
1306
            }
1307
            logger.debug(f'  AS1: {json_dumps(update_as1, indent=2)}')
1✔
1308
            return Object(id=id, our_as1=update_as1,
1✔
1309
                          source_protocol=obj.source_protocol)
1310

1311
        if (obj.new
1✔
1312
                # HACK: force query param here is specific to webmention
1313
                or 'force' in request.form):
1314
            create_id = f'{obj.key.id()}#bridgy-fed-create'
1✔
1315
            create_as1 = {
1✔
1316
                'objectType': 'activity',
1317
                'verb': 'post',
1318
                'id': create_id,
1319
                'actor': obj_actor,
1320
                'object': obj_as1,
1321
                'published': now,
1322
            }
1323
            logger.info(f'Wrapping in post')
1✔
1324
            logger.debug(f'  AS1: {json_dumps(create_as1, indent=2)}')
1✔
1325
            return Object(id=create_id, our_as1=create_as1,
1✔
1326
                          source_protocol=obj.source_protocol)
1327

1328
        error(f'{obj.key.id()} is unchanged, nothing to do', status=204)
1✔
1329

1330
    @classmethod
1✔
1331
    def deliver(from_cls, obj, from_user, crud_obj=None, to_proto=None):
1✔
1332
        """Delivers an activity to its external recipients.
1333

1334
        Args:
1335
          obj (models.Object): activity to deliver
1336
          from_user (models.User): user (actor) this activity is from
1337
          crud_obj (models.Object): if this is a create, update, or delete/undo
1338
            activity, the inner object that's being written, otherwise None.
1339
            (This object's ``notify`` and ``feed`` properties may be updated.)
1340
          to_proto (protocol.Protocol): optional; if provided, only deliver to
1341
            targets on this protocol
1342

1343
        Returns:
1344
          (str, int) tuple: Flask response
1345
        """
1346
        if to_proto:
1✔
1347
            logger.info(f'Only delivering to {to_proto.LABEL}')
1✔
1348

1349
        # find delivery targets. maps Target to Object or None
1350
        #
1351
        # ...then write the relevant object, since targets() has a side effect of
1352
        # setting the notify and feed properties (and dirty attribute)
1353
        targets = from_cls.targets(obj, from_user=from_user, crud_obj=crud_obj)
1✔
1354
        if not targets:
1✔
1355
            return r'No targets, nothing to do ¯\_(ツ)_/¯', 204
1✔
1356

1357
        # store object that targets() updated
1358
        if crud_obj and crud_obj.dirty:
1✔
1359
            crud_obj.put()
1✔
1360
        elif obj.type in STORE_AS1_TYPES and obj.dirty:
1✔
1361
            obj.put()
1✔
1362

1363
        obj_params = ({'obj_id': obj.key.id()} if obj.type in STORE_AS1_TYPES
1✔
1364
                      else obj.to_request())
1365

1366
        # sort targets so order is deterministic for tests, debugging, etc
1367
        sorted_targets = sorted(targets.items(), key=lambda t: t[0].uri)
1✔
1368

1369
        # enqueue send task for each targets
1370
        logger.info(f'Delivering to: {[t for t, _ in sorted_targets]}')
1✔
1371
        user = from_user.key.urlsafe()
1✔
1372
        for i, (target, orig_obj) in enumerate(sorted_targets):
1✔
1373
            if to_proto and target.protocol != to_proto.LABEL:
1✔
UNCOV
1374
                continue
×
1375
            orig_obj_id = orig_obj.key.id() if orig_obj else None
1✔
1376
            common.create_task(queue='send', url=target.uri, protocol=target.protocol,
1✔
1377
                               orig_obj_id=orig_obj_id, user=user, **obj_params)
1378

1379
        return 'OK', 202
1✔
1380

1381
    @classmethod
1✔
1382
    def targets(from_cls, obj, from_user, crud_obj=None, internal=False):
1✔
1383
        """Collects the targets to send a :class:`models.Object` to.
1384

1385
        Targets are both objects - original posts, events, etc - and actors.
1386

1387
        Args:
1388
          obj (models.Object)
1389
          from_user (User)
1390
          crud_obj (models.Object): if this is a create, update, or delete/undo
1391
            activity, the inner object that's being written, otherwise None.
1392
            (This object's ``notify`` and ``feed`` properties may be updated.)
1393
          internal (bool): whether this is a recursive internal call
1394

1395
        Returns:
1396
          dict: maps :class:`models.Target` to original (in response to)
1397
          :class:`models.Object`, if any, otherwise None
1398
        """
1399
        logger.debug('Finding recipients and their targets')
1✔
1400

1401
        # we should only have crud_obj iff this is a create or update
1402
        assert (crud_obj is not None) == (obj.type in ('post', 'update')), obj.type
1✔
1403
        write_obj = crud_obj or obj
1✔
1404
        write_obj.dirty = False
1✔
1405

1406
        target_uris = sorted(set(as1.targets(obj.as1)))
1✔
1407
        logger.info(f'Raw targets: {target_uris}')
1✔
1408
        orig_obj = None
1✔
1409
        targets = {}  # maps Target to Object or None
1✔
1410
        owner = as1.get_owner(obj.as1)
1✔
1411
        allow_opt_out = (obj.type == 'delete')
1✔
1412
        inner_obj_as1 = as1.get_object(obj.as1)
1✔
1413
        inner_obj_id = inner_obj_as1.get('id')
1✔
1414
        in_reply_tos = as1.get_ids(inner_obj_as1, 'inReplyTo')
1✔
1415
        is_reply = obj.type == 'comment' or in_reply_tos
1✔
1416
        is_self_reply = False
1✔
1417

1418
        if is_reply:
1✔
1419
            original_ids = in_reply_tos
1✔
1420
        else:
1421
            if inner_obj_id == from_user.key.id():
1✔
1422
                inner_obj_id = from_user.profile_id()
1✔
1423
            original_ids = [inner_obj_id]
1✔
1424

1425
        # which protocols should we allow delivering to?
1426
        to_protocols = []
1✔
1427
        for label in (list(from_user.DEFAULT_ENABLED_PROTOCOLS)
1✔
1428
                      + from_user.enabled_protocols):
1429
            proto = PROTOCOLS[label]
1✔
1430
            if proto.HAS_COPIES and (obj.type in ('update', 'delete', 'share', 'undo')
1✔
1431
                                     or is_reply):
1432
                for id in original_ids:
1✔
1433
                    if Protocol.for_id(id) == proto:
1✔
1434
                        logger.info(f'Allowing {label} for original post {id}')
1✔
1435
                        break
1✔
1436
                    elif orig := from_user.load(id, remote=False):
1✔
1437
                        if orig.get_copy(proto):
1✔
1438
                            logger.info(f'Allowing {label}, original post {id} was bridged there')
1✔
1439
                            break
1✔
1440
                else:
1441
                    logger.info(f"Skipping {label}, original objects {original_ids} weren't bridged there")
1✔
1442
                    continue
1✔
1443

1444
            util.add(to_protocols, proto)
1✔
1445

1446
        # process direct targets
1447
        for id in sorted(target_uris):
1✔
1448
            target_proto = Protocol.for_id(id)
1✔
1449
            if not target_proto:
1✔
1450
                logger.info(f"Can't determine protocol for {id}")
1✔
1451
                continue
1✔
1452
            elif target_proto.is_blocklisted(id):
1✔
1453
                logger.debug(f'{id} is blocklisted')
1✔
1454
                continue
1✔
1455

1456
            orig_obj = target_proto.load(id, raise_=False)
1✔
1457
            if not orig_obj or not orig_obj.as1:
1✔
1458
                logger.info(f"Couldn't load {id}")
1✔
1459
                continue
1✔
1460

1461
            target_author_key = target_proto.actor_key(orig_obj)
1✔
1462
            if not from_user.is_enabled(target_proto):
1✔
1463
                # if author isn't bridged and inReplyTo author is, DM a prompt
1464
                if id in in_reply_tos:
1✔
1465
                    if target_author := target_author_key.get():
1✔
1466
                        if target_author.is_enabled(from_cls):
1✔
1467
                            dms.maybe_send(
1✔
1468
                                from_proto=target_proto, to_user=from_user,
1469
                                type='replied_to_bridged_user', text=f"""\
1470
Hi! You <a href="{inner_obj_as1.get('url') or inner_obj_id}">recently replied</a> to {orig_obj.actor_link(image=False)}, who's bridged here from {target_proto.PHRASE}. If you want them to see your replies, you can bridge your account into {target_proto.PHRASE} by following this account. <a href="https://fed.brid.gy/docs">See the docs</a> for more information.""")
1471

1472
                continue
1✔
1473

1474
            # deliver self-replies to followers
1475
            # https://github.com/snarfed/bridgy-fed/issues/639
1476
            if id in in_reply_tos and owner == as1.get_owner(orig_obj.as1):
1✔
1477
                is_self_reply = True
1✔
1478
                logger.info(f'self reply!')
1✔
1479

1480
            # also add copies' targets
1481
            for copy in orig_obj.copies:
1✔
1482
                proto = PROTOCOLS[copy.protocol]
1✔
1483
                if proto in to_protocols:
1✔
1484
                    # copies generally won't have their own Objects
1485
                    if target := proto.target_for(Object(id=copy.uri)):
1✔
1486
                        logger.debug(f'Adding target {target} for copy {copy.uri} of original {id}')
1✔
1487
                        targets[Target(protocol=copy.protocol, uri=target)] = orig_obj
1✔
1488

1489
            if target_proto == from_cls:
1✔
1490
                logger.debug(f'Skipping same-protocol target {id}')
1✔
1491
                continue
1✔
1492

1493
            target = target_proto.target_for(orig_obj)
1✔
1494
            if not target:
1✔
1495
                # TODO: surface errors like this somehow?
UNCOV
1496
                logger.error(f"Can't find delivery target for {id}")
×
UNCOV
1497
                continue
×
1498

1499
            logger.debug(f'Target for {id} is {target}')
1✔
1500
            # only use orig_obj for inReplyTos, like/repost objects, etc
1501
            # https://github.com/snarfed/bridgy-fed/issues/1237
1502
            targets[Target(protocol=target_proto.LABEL, uri=target)] = (
1✔
1503
                orig_obj if id in in_reply_tos or id in as1.get_ids(obj.as1, 'object')
1504
                else None)
1505

1506
            if target_author_key:
1✔
1507
                logger.debug(f'Recipient is {target_author_key}')
1✔
1508
                if write_obj.add('notify', target_author_key):
1✔
1509
                    write_obj.dirty = True
1✔
1510

1511
        if obj.type == 'undo':
1✔
1512
            logger.debug('Object is an undo; adding targets for inner object')
1✔
1513
            if set(inner_obj_as1.keys()) == {'id'}:
1✔
1514
                inner_obj = from_cls.load(inner_obj_id, raise_=False)
1✔
1515
            else:
1516
                inner_obj = Object(id=inner_obj_id, our_as1=inner_obj_as1)
1✔
1517
            if inner_obj:
1✔
1518
                targets.update(from_cls.targets(inner_obj, from_user=from_user,
1✔
1519
                                                internal=True))
1520

1521
        logger.info(f'Direct targets: {[t.uri for t in targets.keys()]}')
1✔
1522

1523
        # deliver to followers, if appropriate
1524
        user_key = from_cls.actor_key(obj, allow_opt_out=allow_opt_out)
1✔
1525
        if not user_key:
1✔
1526
            logger.info("Can't tell who this is from! Skipping followers.")
1✔
1527
            return targets
1✔
1528

1529
        followers = []
1✔
1530
        if (obj.type in ('post', 'update', 'delete', 'share', 'undo')
1✔
1531
                and (not is_reply or is_self_reply)):
1532
            logger.info(f'Delivering to followers of {user_key}')
1✔
1533
            followers = [
1✔
1534
                f for f in Follower.query(Follower.to == user_key,
1535
                                          Follower.status == 'active')
1536
                # skip protocol bot users
1537
                if not Protocol.for_bridgy_subdomain(f.from_.id())
1538
                # skip protocols this user hasn't enabled, or where the base
1539
                # object of this activity hasn't been bridged
1540
                and PROTOCOLS_BY_KIND[f.from_.kind()] in to_protocols]
1541
            user_keys = [f.from_ for f in followers]
1✔
1542
            users = [u for u in ndb.get_multi(user_keys) if u]
1✔
1543
            User.load_multi(users)
1✔
1544

1545
            if (not followers and
1✔
1546
                (util.domain_or_parent_in(
1547
                    util.domain_from_link(from_user.key.id()), LIMITED_DOMAINS)
1548
                 or util.domain_or_parent_in(
1549
                     util.domain_from_link(obj.key.id()), LIMITED_DOMAINS))):
1550
                logger.info(f'skipping, {from_user.key.id()} is on a limited domain and has no followers')
1✔
1551
                return {}
1✔
1552

1553
            # add to followers' feeds, if any
1554
            if not internal and obj.type in ('post', 'update', 'share'):
1✔
1555
                if write_obj.type not in as1.ACTOR_TYPES:
1✔
1556
                    write_obj.feed = [u.key for u in users]
1✔
1557
                    if write_obj.feed:
1✔
1558
                        write_obj.dirty = True
1✔
1559

1560
            # collect targets for followers
1561
            for user in users:
1✔
1562
                # TODO: should we pass remote=False through here to Protocol.load?
1563
                target = user.target_for(user.obj, shared=True) if user.obj else None
1✔
1564
                if not target:
1✔
1565
                    # TODO: surface errors like this somehow?
1566
                    logger.error(f'Follower {user.key} has no delivery target')
1✔
1567
                    continue
1✔
1568

1569
                # normalize URL (lower case hostname, etc)
1570
                # ...but preserve our PDS URL without trailing slash in path
1571
                # https://atproto.com/specs/did#did-documents
1572
                target = util.dedupe_urls([target], trailing_slash=False)[0]
1✔
1573

1574
                targets[Target(protocol=user.LABEL, uri=target)] = \
1✔
1575
                    Object.get_by_id(inner_obj_id) if obj.type == 'share' else None
1576

1577
        # deliver to enabled HAS_COPIES protocols proactively
1578
        # TODO: abstract for other protocols
1579
        from atproto import ATProto
1✔
1580
        if (ATProto in to_protocols
1✔
1581
                and obj.type in ('post', 'update', 'delete', 'share')):
1582
            logger.info(f'user has ATProto enabled, adding {ATProto.PDS_URL}')
1✔
1583
            targets.setdefault(
1✔
1584
                Target(protocol=ATProto.LABEL, uri=ATProto.PDS_URL), None)
1585

1586
        # de-dupe targets, discard same-domain
1587
        # maps string target URL to (Target, Object) tuple
1588
        candidates = {t.uri: (t, obj) for t, obj in targets.items()}
1✔
1589
        # maps Target to Object or None
1590
        targets = {}
1✔
1591
        source_domains = [
1✔
1592
            util.domain_from_link(url) for url in
1593
            (obj.as1.get('id'), obj.as1.get('url'), as1.get_owner(obj.as1))
1594
            if util.is_web(url)
1595
        ]
1596
        for url in sorted(util.dedupe_urls(
1✔
1597
                candidates.keys(),
1598
                # preserve our PDS URL without trailing slash in path
1599
                # https://atproto.com/specs/did#did-documents
1600
                trailing_slash=False)):
1601
            if util.is_web(url) and util.domain_from_link(url) in source_domains:
1✔
UNCOV
1602
                logger.info(f'Skipping same-domain target {url}')
×
UNCOV
1603
                continue
×
1604
            target, obj = candidates[url]
1✔
1605
            targets[target] = obj
1✔
1606

1607
        return targets
1✔
1608

1609
    @classmethod
1✔
1610
    def load(cls, id, remote=None, local=True, raise_=True, **kwargs):
1✔
1611
        """Loads and returns an Object from datastore or HTTP fetch.
1612

1613
        Sets the :attr:`new` and :attr:`changed` attributes if we know either
1614
        one for the loaded object, ie local is True and remote is True or None.
1615

1616
        Args:
1617
          id (str)
1618
          remote (bool): whether to fetch the object over the network. If True,
1619
            fetches even if we already have the object stored, and updates our
1620
            stored copy. If False and we don't have the object stored, returns
1621
            None. Default (None) means to fetch over the network only if we
1622
            don't already have it stored.
1623
          local (bool): whether to load from the datastore before
1624
            fetching over the network. If False, still stores back to the
1625
            datastore after a successful remote fetch.
1626
          raise_ (bool): if False, catches any :class:`request.RequestException`
1627
            or :class:`HTTPException` raised by :meth:`fetch()` and returns
1628
            ``None`` instead
1629
          kwargs: passed through to :meth:`fetch()`
1630

1631
        Returns:
1632
          models.Object: loaded object, or None if it isn't fetchable, eg a
1633
          non-URL string for Web, or ``remote`` is False and it isn't in the
1634
          datastore
1635

1636
        Raises:
1637
          requests.HTTPError: anything that :meth:`fetch` raises, if ``raise_``
1638
            is True
1639
        """
1640
        assert id
1✔
1641
        assert local or remote is not False
1✔
1642
        # logger.debug(f'Loading Object {id} local={local} remote={remote}')
1643

1644
        obj = orig_as1 = None
1✔
1645
        if local:
1✔
1646
            obj = Object.get_by_id(id)
1✔
1647
            if not obj:
1✔
1648
                # logger.debug(f' {id} not in datastore')
1649
                pass
1✔
1650
            elif obj.as1 or obj.raw or obj.deleted:
1✔
1651
                # logger.debug(f'  {id} got from datastore')
1652
                obj.new = False
1✔
1653

1654
        if remote is False:
1✔
1655
            return obj
1✔
1656
        elif remote is None and obj:
1✔
1657
            if obj.updated < util.as_utc(util.now() - OBJECT_REFRESH_AGE):
1✔
1658
                # logger.debug(f'  last updated {obj.updated}, refreshing')
1659
                pass
1✔
1660
            else:
1661
                return obj
1✔
1662

1663
        if obj:
1✔
1664
            orig_as1 = obj.as1
1✔
1665
            obj.our_as1 = None
1✔
1666
            obj.new = False
1✔
1667
        else:
1668
            obj = Object(id=id)
1✔
1669
            if local:
1✔
1670
                # logger.debug(f'  {id} not in datastore')
1671
                obj.new = True
1✔
1672
                obj.changed = False
1✔
1673

1674
        try:
1✔
1675
            fetched = cls.fetch(obj, **kwargs)
1✔
1676
        except (RequestException, HTTPException) as e:
1✔
1677
            if raise_:
1✔
1678
                raise
1✔
1679
            util.interpret_http_exception(e)
1✔
1680
            return None
1✔
1681

1682
        if not fetched:
1✔
1683
            return None
1✔
1684

1685
        # https://stackoverflow.com/a/3042250/186123
1686
        size = len(_entity_to_protobuf(obj)._pb.SerializeToString())
1✔
1687
        if size > models.MAX_ENTITY_SIZE:
1✔
1688
            logger.warning(f'Object is too big! {size} bytes is over {models.MAX_ENTITY_SIZE}')
1✔
1689
            return None
1✔
1690

1691
        obj.resolve_ids()
1✔
1692
        obj.normalize_ids()
1✔
1693

1694
        if obj.new is False:
1✔
1695
            obj.changed = obj.activity_changed(orig_as1)
1✔
1696

1697
        if obj.source_protocol not in (cls.LABEL, cls.ABBREV):
1✔
1698
            if obj.source_protocol:
1✔
UNCOV
1699
                logger.warning(f'Object {obj.key.id()} changed protocol from {obj.source_protocol} to {cls.LABEL} ?!')
×
1700
            obj.source_protocol = cls.LABEL
1✔
1701

1702
        obj.put()
1✔
1703
        return obj
1✔
1704

1705
    @classmethod
1✔
1706
    def check_supported(cls, obj):
1✔
1707
        """If this protocol doesn't support this object, return 204.
1708

1709
        Also reports an error.
1710

1711
        (This logic is duplicated in some protocols, eg ActivityPub, so that
1712
        they can short circuit out early. It generally uses their native formats
1713
        instead of AS1, before an :class:`models.Object` is created.)
1714

1715
        Args:
1716
          obj (Object)
1717
        """
1718
        if not obj.type:
1✔
UNCOV
1719
            return
×
1720

1721
        inner_type = as1.object_type(as1.get_object(obj.as1)) or ''
1✔
1722
        if (obj.type not in cls.SUPPORTED_AS1_TYPES
1✔
1723
            or (obj.type in as1.CRUD_VERBS
1724
                and inner_type
1725
                and inner_type not in cls.SUPPORTED_AS1_TYPES)):
1726
            error(f"Bridgy Fed for {cls.LABEL} doesn't support {obj.type} {inner_type} yet", status=204)
1✔
1727

1728
        # DMs are only allowed to/from protocol bot accounts
1729
        if recip := as1.recipient_if_dm(obj.as1):
1✔
1730
            protocol_user_ids = PROTOCOL_DOMAINS + common.protocol_user_copy_ids()
1✔
1731
            if (not cls.SUPPORTS_DMS
1✔
1732
                    or (recip not in protocol_user_ids
1733
                        and as1.get_owner(obj.as1) not in protocol_user_ids)):
1734
                error(f"Bridgy Fed doesn't support DMs", status=204)
1✔
1735

1736

1737
@cloud_tasks_only(log=None)
1✔
1738
def receive_task():
1✔
1739
    """Task handler for a newly received :class:`models.Object`.
1740

1741
    Calls :meth:`Protocol.receive` with the form parameters.
1742

1743
    Parameters:
1744
      authed_as (str): passed to :meth:`Protocol.receive`
1745
      obj_id (str): key id of :class:`models.Object` to handle
1746
      received_at (str, ISO 8601 timestamp): when we first saw (received)
1747
        this activity
1748
      *: If ``obj_id`` is unset, all other parameters are properties for a new
1749
        :class:`models.Object` to handle
1750

1751
    TODO: migrate incoming webmentions to this. See how we did it for AP. The
1752
    difficulty is that parts of :meth:`protocol.Protocol.receive` depend on
1753
    setup in :func:`web.webmention`, eg :class:`models.Object` with ``new`` and
1754
    ``changed``, HTTP request details, etc. See stash for attempt at this for
1755
    :class:`web.Web`.
1756
    """
1757
    common.log_request()
1✔
1758
    form = request.form.to_dict()
1✔
1759

1760
    authed_as = form.pop('authed_as', None)
1✔
1761
    internal = (authed_as == common.PRIMARY_DOMAIN
1✔
1762
                or authed_as in common.PROTOCOL_DOMAINS)
1763

1764
    obj = Object.from_request()
1✔
1765
    assert obj
1✔
1766
    assert obj.source_protocol
1✔
1767
    obj.new = True
1✔
1768

1769
    if received_at := form.pop('received_at', None):
1✔
1770
        received_at = datetime.fromisoformat(received_at)
1✔
1771

1772
    try:
1✔
1773
        return PROTOCOLS[obj.source_protocol].receive(
1✔
1774
            obj=obj, authed_as=authed_as, internal=internal, received_at=received_at)
1775
    except RequestException as e:
1✔
1776
        util.interpret_http_exception(e)
1✔
1777
        error(e, status=304)
1✔
1778
    except ValueError as e:
1✔
UNCOV
1779
        logger.warning(e, exc_info=True)
×
UNCOV
1780
        error(e, status=304)
×
1781

1782

1783
@cloud_tasks_only(log=None)
1✔
1784
def send_task():
1✔
1785
    """Task handler for sending an activity to a single specific destination.
1786

1787
    Calls :meth:`Protocol.send` with the form parameters.
1788

1789
    Parameters:
1790
      protocol (str): :class:`Protocol` to send to
1791
      url (str): destination URL to send to
1792
      obj_id (str): key id of :class:`models.Object` to send
1793
      orig_obj_id (str): optional, :class:`models.Object` key id of the
1794
        "original object" that this object refers to, eg replies to or reposts
1795
        or likes
1796
      user (url-safe google.cloud.ndb.key.Key): :class:`models.User` (actor)
1797
        this activity is from
1798
      *: If ``obj_id`` is unset, all other parameters are properties for a new
1799
        :class:`models.Object` to handle
1800
    """
1801
    common.log_request()
1✔
1802

1803
    # prepare
1804
    form = request.form.to_dict()
1✔
1805
    url = form.get('url')
1✔
1806
    protocol = form.get('protocol')
1✔
1807
    if not url or not protocol:
1✔
1808
        logger.warning(f'Missing protocol or url; got {protocol} {url}')
1✔
1809
        return '', 204
1✔
1810

1811
    target = Target(uri=url, protocol=protocol)
1✔
1812
    obj = Object.from_request()
1✔
1813
    assert obj and obj.key and obj.key.id()
1✔
1814

1815
    PROTOCOLS[protocol].check_supported(obj)
1✔
1816
    allow_opt_out = (obj.type == 'delete')
1✔
1817

1818
    user = None
1✔
1819
    if user_key := form.get('user'):
1✔
1820
        key = ndb.Key(urlsafe=user_key)
1✔
1821
        # use get_by_id so that we follow use_instead
1822
        user = PROTOCOLS_BY_KIND[key.kind()].get_by_id(
1✔
1823
            key.id(), allow_opt_out=allow_opt_out)
1824

1825
    # send
1826
    delay = ''
1✔
1827
    if request.headers.get('X-AppEngine-TaskRetryCount') == '0' and obj.created:
1✔
1828
        delay_s = int((util.now().replace(tzinfo=None) - obj.created).total_seconds())
1✔
1829
        delay = f'({delay_s} s behind)'
1✔
1830
    logger.info(f'Sending {obj.source_protocol} {obj.type} {obj.key.id()} to {protocol} {url} {delay}')
1✔
1831
    logger.debug(f'  AS1: {json_dumps(obj.as1, indent=2)}')
1✔
1832
    sent = None
1✔
1833
    try:
1✔
1834
        sent = PROTOCOLS[protocol].send(obj, url, from_user=user,
1✔
1835
                                        orig_obj_id=form.get('orig_obj_id'))
1836
    except BaseException as e:
1✔
1837
        code, body = util.interpret_http_exception(e)
1✔
1838
        if not code and not body:
1✔
1839
            raise
1✔
1840

1841
    if sent is False:
1✔
1842
        logger.info(f'Failed sending!')
1✔
1843

1844
    return '', 200 if sent else 204 if sent is False else 304
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc