• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

snarfed / bridgy-fed / a0c4e4a4-3804-4422-8536-bef478a4befb

10 Feb 2025 04:55AM UTC coverage: 93.223% (+0.002%) from 93.221%
a0c4e4a4-3804-4422-8536-bef478a4befb

push

circleci

snarfed
Protocol.targets bug fix for when target is blocked

fixes https://console.cloud.google.com/errors/detail/COu9uJiczoTrygE;locations=global;time=P30D?project=bridgy-federated

1 of 1 new or added line in 1 file covered. (100.0%)

37 existing lines in 1 file now uncovered.

4691 of 5032 relevant lines covered (93.22%)

0.93 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

94.09
/protocol.py
1
"""Base protocol class and common code."""
2
import copy
1✔
3
from datetime import datetime, timedelta, timezone
1✔
4
import logging
1✔
5
import os
1✔
6
import re
1✔
7
from threading import Lock
1✔
8
from urllib.parse import urljoin, urlparse
1✔
9

10
from cachetools import cached, LRUCache
1✔
11
from flask import request
1✔
12
from google.cloud import ndb
1✔
13
from google.cloud.ndb import OR
1✔
14
from google.cloud.ndb.model import _entity_to_protobuf
1✔
15
from granary import as1, as2, source
1✔
16
from granary.source import html_to_text
1✔
17
from oauth_dropins.webutil.appengine_info import DEBUG
1✔
18
from oauth_dropins.webutil.flask_util import cloud_tasks_only
1✔
19
from oauth_dropins.webutil import models
1✔
20
from oauth_dropins.webutil import util
1✔
21
from oauth_dropins.webutil.util import json_dumps, json_loads
1✔
22
from requests import RequestException
1✔
23
import werkzeug.exceptions
1✔
24
from werkzeug.exceptions import BadGateway, HTTPException
1✔
25

26
import common
1✔
27
from common import (
1✔
28
    DOMAIN_BLOCKLIST,
29
    DOMAIN_RE,
30
    DOMAINS,
31
    PRIMARY_DOMAIN,
32
    PROTOCOL_DOMAINS,
33
    report_error,
34
    subdomain_wrap,
35
)
36
import dms
1✔
37
import ids
1✔
38
from ids import (
1✔
39
    BOT_ACTOR_AP_IDS,
40
    normalize_user_id,
41
    translate_object_id,
42
    translate_user_id,
43
)
44
import memcache
1✔
45
from models import (
1✔
46
    DM,
47
    Follower,
48
    Object,
49
    PROTOCOLS,
50
    PROTOCOLS_BY_KIND,
51
    Target,
52
    User,
53
)
54

55
OBJECT_REFRESH_AGE = timedelta(days=30)
1✔
56
DELETE_TASK_DELAY = timedelta(minutes=2)
1✔
57
CREATE_MAX_AGE = timedelta(weeks=2)
1✔
58

59
# require a follow for users on these domains before we deliver anything from
60
# them other than their profile
61
LIMITED_DOMAINS = (os.getenv('LIMITED_DOMAINS', '').split()
1✔
62
                   or util.load_file_lines('limited_domains'))
63

64
DONT_STORE_AS1_TYPES = as1.CRUD_VERBS | set((
1✔
65
    'accept',
66
    'reject',
67
    'stop-following',
68
    'undo',
69
))
70
STORE_AS1_TYPES = (as1.ACTOR_TYPES | as1.POST_TYPES | as1.VERBS_WITH_OBJECT
1✔
71
                   - DONT_STORE_AS1_TYPES)
72

73
logger = logging.getLogger(__name__)
1✔
74

75

76
def error(*args, status=299, **kwargs):
1✔
77
    """Default HTTP status code to 299 to prevent retrying task."""
78
    return common.error(*args, status=status, **kwargs)
1✔
79

80

81
class ErrorButDoNotRetryTask(HTTPException):
1✔
82
    code = 299
1✔
83
    description = 'ErrorButDoNotRetryTask'
1✔
84

85
# https://github.com/pallets/flask/issues/1837#issuecomment-304996942
86
werkzeug.exceptions.default_exceptions.setdefault(299, ErrorButDoNotRetryTask)
1✔
87
werkzeug.exceptions._aborter.mapping.setdefault(299, ErrorButDoNotRetryTask)
1✔
88

89

90
def activity_id_memcache_key(id):
1✔
91
    return memcache.key(f'receive-{id}')
1✔
92

93

94
class Protocol:
1✔
95
    """Base protocol class. Not to be instantiated; classmethods only."""
96
    ABBREV = None
1✔
97
    """str: lower case abbreviation, used in URL paths"""
1✔
98
    PHRASE = None
1✔
99
    """str: human-readable name or phrase. Used in phrases like ``Follow this person on {PHRASE}``"""
1✔
100
    OTHER_LABELS = ()
1✔
101
    """sequence of str: label aliases"""
1✔
102
    LOGO_HTML = ''
1✔
103
    """str: logo emoji or ``<img>`` tag"""
1✔
104
    CONTENT_TYPE = None
1✔
105
    """str: MIME type of this protocol's native data format, appropriate for the ``Content-Type`` HTTP header."""
1✔
106
    HAS_COPIES = False
1✔
107
    """bool: whether this protocol is push and needs us to proactively create "copy" users and objects, as opposed to pulling converted objects on demand"""
1✔
108
    REQUIRES_AVATAR = False
1✔
109
    """bool: whether accounts on this protocol are required to have a profile picture. If they don't, their ``User.status`` will be ``blocked``."""
1✔
110
    REQUIRES_NAME = False
1✔
111
    """bool: whether accounts on this protocol are required to have a profile name that's different than their handle or id. If they don't, their ``User.status`` will be ``blocked``."""
1✔
112
    REQUIRES_OLD_ACCOUNT = False
1✔
113
    """bool: whether accounts on this protocol are required to be at least :const:`common.OLD_ACCOUNT_AGE` old. If their profile includes creation date and it's not old enough, their ``User.status`` will be ``blocked``."""
1✔
114
    DEFAULT_ENABLED_PROTOCOLS = ()
1✔
115
    """sequence of str: labels of other protocols that are automatically enabled for this protocol to bridge into"""
1✔
116
    DEFAULT_SERVE_USER_PAGES = False
1✔
117
    """bool: whether to serve user pages for all of this protocol's users on the fed.brid.gy. If ``False``, user pages will only be served for users who have explictly opted in."""
1✔
118
    SUPPORTED_AS1_TYPES = ()
1✔
119
    """sequence of str: AS1 objectTypes and verbs that this protocol supports receiving and sending"""
1✔
120
    SUPPORTS_DMS = False
1✔
121
    """bool: whether this protocol can receive DMs (chat messages)"""
1✔
122

123
    def __init__(self):
1✔
124
        assert False
×
125

126
    @classmethod
1✔
127
    @property
1✔
128
    def LABEL(cls):
1✔
129
        """str: human-readable lower case name of this protocol, eg ``'activitypub``"""
130
        return cls.__name__.lower()
1✔
131

132
    @staticmethod
1✔
133
    def for_request(fed=None):
1✔
134
        """Returns the protocol for the current request.
135

136
        ...based on the request's hostname.
137

138
        Args:
139
          fed (str or protocol.Protocol): protocol to return if the current
140
            request is on ``fed.brid.gy``
141

142
        Returns:
143
          Protocol: protocol, or None if the provided domain or request hostname
144
          domain is not a subdomain of ``brid.gy`` or isn't a known protocol
145
        """
146
        return Protocol.for_bridgy_subdomain(request.host, fed=fed)
1✔
147

148
    @staticmethod
1✔
149
    def for_bridgy_subdomain(domain_or_url, fed=None):
1✔
150
        """Returns the protocol for a brid.gy subdomain.
151

152
        Args:
153
          domain_or_url (str)
154
          fed (str or protocol.Protocol): protocol to return if the current
155
            request is on ``fed.brid.gy``
156

157
        Returns:
158
          class: :class:`Protocol` subclass, or None if the provided domain or request
159
          hostname domain is not a subdomain of ``brid.gy`` or isn't a known
160
          protocol
161
        """
162
        domain = (util.domain_from_link(domain_or_url, minimize=False)
1✔
163
                  if util.is_web(domain_or_url)
164
                  else domain_or_url)
165

166
        if domain == common.PRIMARY_DOMAIN or domain in common.LOCAL_DOMAINS:
1✔
167
            return PROTOCOLS[fed] if isinstance(fed, str) else fed
1✔
168
        elif domain and domain.endswith(common.SUPERDOMAIN):
1✔
169
            label = domain.removesuffix(common.SUPERDOMAIN)
1✔
170
            return PROTOCOLS.get(label)
1✔
171

172
    @classmethod
1✔
173
    def owns_id(cls, id):
1✔
174
        """Returns whether this protocol owns the id, or None if it's unclear.
175

176
        To be implemented by subclasses.
177

178
        IDs are string identities that uniquely identify users, and are intended
179
        primarily to be machine readable and usable. Compare to handles, which
180
        are human-chosen, human-meaningful, and often but not always unique.
181

182
        Some protocols' ids are more or less deterministic based on the id
183
        format, eg AT Protocol owns ``at://`` URIs. Others, like http(s) URLs,
184
        could be owned by eg Web or ActivityPub.
185

186
        This should be a quick guess without expensive side effects, eg no
187
        external HTTP fetches to fetch the id itself or otherwise perform
188
        discovery.
189

190
        Returns False if the id's domain is in :const:`common.DOMAIN_BLOCKLIST`.
191

192
        Args:
193
          id (str)
194

195
        Returns:
196
          bool or None:
197
        """
198
        return False
1✔
199

200
    @classmethod
1✔
201
    def owns_handle(cls, handle, allow_internal=False):
1✔
202
        """Returns whether this protocol owns the handle, or None if it's unclear.
203

204
        To be implemented by subclasses.
205

206
        Handles are string identities that are human-chosen, human-meaningful,
207
        and often but not always unique. Compare to IDs, which uniquely identify
208
        users, and are intended primarily to be machine readable and usable.
209

210
        Some protocols' handles are more or less deterministic based on the id
211
        format, eg ActivityPub (technically WebFinger) handles are
212
        ``@user@instance.com``. Others, like domains, could be owned by eg Web,
213
        ActivityPub, AT Protocol, or others.
214

215
        This should be a quick guess without expensive side effects, eg no
216
        external HTTP fetches to fetch the id itself or otherwise perform
217
        discovery.
218

219
        Args:
220
          handle (str)
221
          allow_internal (bool): whether to return False for internal domains
222
            like ``fed.brid.gy``, ``bsky.brid.gy``, etc
223

224
        Returns:
225
          bool or None
226
        """
227
        return False
1✔
228

229
    @classmethod
1✔
230
    def handle_to_id(cls, handle):
1✔
231
        """Converts a handle to an id.
232

233
        To be implemented by subclasses.
234

235
        May incur network requests, eg DNS queries or HTTP requests. Avoids
236
        blocked or opted out users.
237

238
        Args:
239
          handle (str)
240

241
        Returns:
242
          str: corresponding id, or None if the handle can't be found
243
        """
244
        raise NotImplementedError()
×
245

246
    @classmethod
1✔
247
    def key_for(cls, id, allow_opt_out=False):
1✔
248
        """Returns the :class:`google.cloud.ndb.Key` for a given id's :class:`models.User`.
249

250
        To be implemented by subclasses. Canonicalizes the id if necessary.
251

252
        If called via `Protocol.key_for`, infers the appropriate protocol with
253
        :meth:`for_id`. If called with a concrete subclass, uses that subclass
254
        as is.
255

256
        Args:
257
          id (str):
258
          allow_opt_out (bool): whether to allow users who are currently opted out
259

260
        Returns:
261
          google.cloud.ndb.Key: matching key, or None if the given id is not a
262
          valid :class:`User` id for this protocol.
263
        """
264
        if cls == Protocol:
1✔
265
            proto = Protocol.for_id(id)
1✔
266
            return proto.key_for(id, allow_opt_out=allow_opt_out) if proto else None
1✔
267

268
        # load user so that we follow use_instead
269
        existing = cls.get_by_id(id, allow_opt_out=True)
1✔
270
        if existing:
1✔
271
            if existing.status and not allow_opt_out:
1✔
272
                return None
1✔
273
            return existing.key
1✔
274

275
        return cls(id=id).key
1✔
276

277
    @staticmethod
1✔
278
    def _for_id_memcache_key(id, remote=None):
1✔
279
        """If id is a URL, uses its domain, otherwise returns None.
280

281
        Args:
282
          id (str)
283

284
        Returns:
285
          (str domain, bool remote) or None
286
        """
287
        if remote and util.is_web(id):
1✔
288
            return util.domain_from_link(id)
1✔
289

290
    @cached(LRUCache(20000), lock=Lock())
1✔
291
    @memcache.memoize(key=_for_id_memcache_key, write=lambda id, remote: remote,
1✔
292
                      version=3)
293
    @staticmethod
1✔
294
    def for_id(id, remote=True):
1✔
295
        """Returns the protocol for a given id.
296

297
        Args:
298
          id (str)
299
          remote (bool): whether to perform expensive side effects like fetching
300
            the id itself over the network, or other discovery.
301

302
        Returns:
303
          Protocol subclass: matching protocol, or None if no single known
304
          protocol definitively owns this id
305
        """
306
        logger.debug(f'Determining protocol for id {id}')
1✔
307
        if not id:
1✔
308
            return None
1✔
309

310
        # remove our synthetic id fragment, if any
311
        #
312
        # will this eventually cause false positives for other services that
313
        # include our full ids inside their own ids, non-URL-encoded? guess
314
        # we'll figure that out if/when it happens.
315
        id = id.partition('#bridgy-fed-')[0]
1✔
316
        if not id:
1✔
317
            return None
1✔
318

319
        if util.is_web(id):
1✔
320
            # step 1: check for our per-protocol subdomains
321
            try:
1✔
322
                is_homepage = urlparse(id).path.strip('/') == ''
1✔
323
            except ValueError as e:
1✔
324
                logger.info(f'urlparse ValueError: {e}')
1✔
325
                return None
1✔
326

327
            by_subdomain = Protocol.for_bridgy_subdomain(id)
1✔
328
            if by_subdomain and not is_homepage and id not in BOT_ACTOR_AP_IDS:
1✔
329
                logger.debug(f'  {by_subdomain.LABEL} owns id {id}')
1✔
330
                return by_subdomain
1✔
331

332
        # step 2: check if any Protocols say conclusively that they own it
333
        # sort to be deterministic
334
        protocols = sorted(set(p for p in PROTOCOLS.values() if p),
1✔
335
                           key=lambda p: p.LABEL)
336
        candidates = []
1✔
337
        for protocol in protocols:
1✔
338
            owns = protocol.owns_id(id)
1✔
339
            if owns:
1✔
340
                logger.debug(f'  {protocol.LABEL} owns id {id}')
1✔
341
                return protocol
1✔
342
            elif owns is not False:
1✔
343
                candidates.append(protocol)
1✔
344

345
        if len(candidates) == 1:
1✔
346
            logger.debug(f'  {candidates[0].LABEL} owns id {id}')
1✔
347
            return candidates[0]
1✔
348

349
        # step 3: look for existing Objects in the datastore
350
        obj = Protocol.load(id, remote=False)
1✔
351
        if obj and obj.source_protocol:
1✔
352
            logger.debug(f'  {obj.key.id()} owned by source_protocol {obj.source_protocol}')
1✔
353
            return PROTOCOLS[obj.source_protocol]
1✔
354

355
        # step 4: fetch over the network, if necessary
356
        if not remote:
1✔
357
            return None
1✔
358

359
        for protocol in candidates:
1✔
360
            logger.debug(f'Trying {protocol.LABEL}')
1✔
361
            try:
1✔
362
                obj = protocol.load(id, local=False, remote=True)
1✔
363

364
                if protocol.ABBREV == 'web':
1✔
365
                    # for web, if we fetch and get HTML without microformats,
366
                    # load returns False but the object will be stored in the
367
                    # datastore with source_protocol web, and in cache. load it
368
                    # again manually to check for that.
369
                    obj = Object.get_by_id(id)
1✔
370
                    if obj and obj.source_protocol != 'web':
1✔
UNCOV
371
                        obj = None
×
372

373
                if obj:
1✔
374
                    logger.debug(f'  {protocol.LABEL} owns id {id}')
1✔
375
                    return protocol
1✔
376
            except BadGateway:
1✔
377
                # we tried and failed fetching the id over the network.
378
                # this depends on ActivityPub.fetch raising this!
379
                return None
1✔
380
            except HTTPException as e:
×
381
                # internal error we generated ourselves; try next protocol
382
                pass
×
383
            except Exception as e:
×
UNCOV
384
                code, _ = util.interpret_http_exception(e)
×
385
                if code:
×
386
                    # we tried and failed fetching the id over the network
UNCOV
387
                    return None
×
UNCOV
388
                raise
×
389

390
        logger.info(f'No matching protocol found for {id} !')
1✔
391
        return None
1✔
392

393
    @cached(LRUCache(20000), lock=Lock())
1✔
394
    @staticmethod
1✔
395
    def for_handle(handle):
1✔
396
        """Returns the protocol for a given handle.
397

398
        May incur expensive side effects like resolving the handle itself over
399
        the network or other discovery.
400

401
        Args:
402
          handle (str)
403

404
        Returns:
405
          (Protocol subclass, str) tuple: matching protocol and optional id (if
406
          resolved), or ``(None, None)`` if no known protocol owns this handle
407
        """
408
        # TODO: normalize, eg convert domains to lower case
409
        logger.debug(f'Determining protocol for handle {handle}')
1✔
410
        if not handle:
1✔
411
            return (None, None)
1✔
412

413
        # step 1: check if any Protocols say conclusively that they own it.
414
        # sort to be deterministic.
415
        protocols = sorted(set(p for p in PROTOCOLS.values() if p),
1✔
416
                           key=lambda p: p.LABEL)
417
        candidates = []
1✔
418
        for proto in protocols:
1✔
419
            owns = proto.owns_handle(handle)
1✔
420
            if owns:
1✔
421
                logger.debug(f'  {proto.LABEL} owns handle {handle}')
1✔
422
                return (proto, None)
1✔
423
            elif owns is not False:
1✔
424
                candidates.append(proto)
1✔
425

426
        if len(candidates) == 1:
1✔
UNCOV
427
            logger.debug(f'  {candidates[0].LABEL} owns handle {handle}')
×
UNCOV
428
            return (candidates[0], None)
×
429

430
        # step 2: look for matching User in the datastore
431
        for proto in candidates:
1✔
432
            user = proto.query(proto.handle == handle).get()
1✔
433
            if user:
1✔
434
                if user.status:
1✔
435
                    return (None, None)
1✔
436
                logger.debug(f'  user {user.key} handle {handle}')
1✔
437
                return (proto, user.key.id())
1✔
438

439
        # step 3: resolve handle to id
440
        for proto in candidates:
1✔
441
            id = proto.handle_to_id(handle)
1✔
442
            if id:
1✔
443
                logger.debug(f'  {proto.LABEL} resolved handle {handle} to id {id}')
1✔
444
                return (proto, id)
1✔
445

446
        logger.info(f'No matching protocol found for handle {handle} !')
1✔
447
        return (None, None)
1✔
448

449
    @classmethod
1✔
450
    def bridged_web_url_for(cls, user, fallback=False):
1✔
451
        """Returns the web URL for a user's bridged profile in this protocol.
452

453
        For example, for Web user ``alice.com``, :meth:`ATProto.bridged_web_url_for`
454
        returns ``https://bsky.app/profile/alice.com.web.brid.gy``
455

456
        Args:
457
          user (models.User)
458
          fallback (bool): if True, and bridged users have no canonical user
459
            profile URL in this protocol, return the native protocol's profile URL
460

461
        Returns:
462
          str, or None if there isn't a canonical URL
463
        """
464
        if fallback:
1✔
465
            return user.web_url()
1✔
466

467
    @classmethod
1✔
468
    def actor_key(cls, obj, allow_opt_out=False):
1✔
469
        """Returns the :class:`User`: key for a given object's author or actor.
470

471
        Args:
472
          obj (models.Object)
473
          allow_opt_out (bool): whether to return a user key if they're opted out
474

475
        Returns:
476
          google.cloud.ndb.key.Key or None:
477
        """
478
        owner = as1.get_owner(obj.as1)
1✔
479
        if owner:
1✔
480
            return cls.key_for(owner, allow_opt_out=allow_opt_out)
1✔
481

482
    @classmethod
1✔
483
    def bot_user_id(cls):
1✔
484
        """Returns the Web user id for the bot user for this protocol.
485

486
        For example, ``'bsky.brid.gy'`` for ATProto.
487

488
        Returns:
489
          str:
490
        """
491
        return f'{cls.ABBREV}{common.SUPERDOMAIN}'
1✔
492

493
    @classmethod
1✔
494
    def create_for(cls, user):
1✔
495
        """Creates or re-activate a copy user in this protocol.
496

497
        Should add the copy user to :attr:`copies`.
498

499
        If the copy user already exists and active, should do nothing.
500

501
        Args:
502
          user (models.User): original source user. Shouldn't already have a
503
            copy user for this protocol in :attr:`copies`.
504

505
        Raises:
506
          ValueError: if we can't create a copy of the given user in this protocol
507
        """
UNCOV
508
        raise NotImplementedError()
×
509

510
    @classmethod
1✔
511
    def send(to_cls, obj, url, from_user=None, orig_obj_id=None):
1✔
512
        """Sends an outgoing activity.
513

514
        To be implemented by subclasses.
515

516
        NOTE: if this protocol's ``HAS_COPIES`` is True, and this method creates
517
        a copy and sends it, it *must* add that copy to the *object*'s (not
518
        activity's) :attr:`copies`!
519

520
        Args:
521
          obj (models.Object): with activity to send
522
          url (str): destination URL to send to
523
          from_user (models.User): user (actor) this activity is from
524
          orig_obj_id (str): :class:`models.Object` key id of the "original object"
525
            that this object refers to, eg replies to or reposts or likes
526

527
        Returns:
528
          bool: True if the activity is sent successfully, False if it is
529
          ignored or otherwise unsent due to protocol logic, eg no webmention
530
          endpoint, protocol doesn't support the activity type. (Failures are
531
          raised as exceptions.)
532

533
        Raises:
534
          werkzeug.HTTPException if the request fails
535
        """
UNCOV
536
        raise NotImplementedError()
×
537

538
    @classmethod
1✔
539
    def fetch(cls, obj, **kwargs):
1✔
540
        """Fetches a protocol-specific object and populates it in an :class:`Object`.
541

542
        Errors are raised as exceptions. If this method returns False, the fetch
543
        didn't fail but didn't succeed either, eg the id isn't valid for this
544
        protocol, or the fetch didn't return valid data for this protocol.
545

546
        To be implemented by subclasses.
547

548
        Args:
549
          obj (models.Object): with the id to fetch. Data is filled into one of
550
            the protocol-specific properties, eg ``as2``, ``mf2``, ``bsky``.
551
          kwargs: subclass-specific
552

553
        Returns:
554
          bool: True if the object was fetched and populated successfully,
555
          False otherwise
556

557
        Raises:
558
          requests.RequestException or werkzeug.HTTPException: if the fetch fails
559
        """
UNCOV
560
        raise NotImplementedError()
×
561

562
    @classmethod
1✔
563
    def convert(cls, obj, from_user=None, **kwargs):
1✔
564
        """Converts an :class:`Object` to this protocol's data format.
565

566
        For example, an HTML string for :class:`Web`, or a dict with AS2 JSON
567
        and ``application/activity+json`` for :class:`ActivityPub`.
568

569
        Just passes through to :meth:`_convert`, then does minor
570
        protocol-independent postprocessing.
571

572
        Args:
573
          obj (models.Object):
574
          from_user (models.User): user (actor) this activity/object is from
575
          kwargs: protocol-specific, passed through to :meth:`_convert`
576

577
        Returns:
578
          converted object in the protocol's native format, often a dict
579
        """
580
        if not obj or not obj.as1:
1✔
581
            return {}
1✔
582

583
        id = obj.key.id() if obj.key else obj.as1.get('id')
1✔
584
        is_activity = obj.as1.get('verb') in ('post', 'update')
1✔
585
        base_obj = as1.get_object(obj.as1) if is_activity else obj.as1
1✔
586
        orig_our_as1 = obj.our_as1
1✔
587

588
        # mark bridged actors as bots and add "bridged by Bridgy Fed" to their bios
589
        if (from_user and base_obj
1✔
590
            and base_obj.get('objectType') in as1.ACTOR_TYPES
591
            and PROTOCOLS.get(obj.source_protocol) != cls
592
            and Protocol.for_bridgy_subdomain(id) not in DOMAINS
593
            # Web users are special cased, they don't get the label if they've
594
            # explicitly enabled Bridgy Fed with redirects or webmentions
595
            and not (from_user.LABEL == 'web'
596
                     and (from_user.last_webmention_in or from_user.has_redirects))):
597

598
            obj.our_as1 = copy.deepcopy(obj.as1)
1✔
599
            actor = as1.get_object(obj.as1) if is_activity else obj.as1
1✔
600
            actor['objectType'] = 'person'
1✔
601
            cls.add_source_links(actor=actor, obj=obj, from_user=from_user)
1✔
602

603
        converted = cls._convert(obj, from_user=from_user, **kwargs)
1✔
604
        obj.our_as1 = orig_our_as1
1✔
605
        return converted
1✔
606

607
    @classmethod
1✔
608
    def _convert(cls, obj, from_user=None, **kwargs):
1✔
609
        """Converts an :class:`Object` to this protocol's data format.
610

611
        To be implemented by subclasses. Implementations should generally call
612
        :meth:`Protocol.translate_ids` (as their own class) before converting to
613
        their format.
614

615
        Args:
616
          obj (models.Object):
617
          from_user (models.User): user (actor) this activity/object is from
618
          kwargs: protocol-specific
619

620
        Returns:
621
          converted object in the protocol's native format, often a dict. May
622
            return the ``{}`` empty dict if the object can't be converted.
623
        """
UNCOV
624
        raise NotImplementedError()
×
625

626
    @classmethod
1✔
627
    def add_source_links(cls, actor, obj, from_user):
1✔
628
        """Adds "bridged from ... by Bridgy Fed" HTML to ``actor['summary']``.
629

630
        Default implementation; subclasses may override.
631

632
        Args:
633
          actor (dict): AS1 actor
634
          obj (models.Object):
635
          from_user (models.User): user (actor) this activity/object is from
636
        """
637
        assert from_user
1✔
638
        summary = actor.setdefault('summary', '')
1✔
639
        if 'Bridgy Fed]' in html_to_text(summary, ignore_links=True):
1✔
640
            return
1✔
641

642
        id = actor.get('id')
1✔
643
        proto_phrase = (PROTOCOLS[obj.source_protocol].PHRASE
1✔
644
                        if obj.source_protocol else '')
645
        if proto_phrase:
1✔
646
            proto_phrase = f' on {proto_phrase}'
1✔
647

648
        if from_user.key and id in (from_user.key.id(), from_user.profile_id()):
1✔
649
            source_links = f'[<a href="https://{PRIMARY_DOMAIN}{from_user.user_page_path()}">bridged</a> from <a href="{from_user.web_url()}">{from_user.handle}</a>{proto_phrase} by <a href="https://{PRIMARY_DOMAIN}/">Bridgy Fed</a>]'
1✔
650

651
        else:
652
            url = as1.get_url(actor) or id
1✔
653
            source = util.pretty_link(url) if url else '?'
1✔
654
            source_links = f'[bridged from {source}{proto_phrase} by <a href="https://{PRIMARY_DOMAIN}/">Bridgy Fed</a>]'
1✔
655

656
        if summary:
1✔
657
            summary += '<br><br>'
1✔
658
        actor['summary'] = summary + source_links
1✔
659

660
    @classmethod
1✔
661
    def set_username(to_cls, user, username):
1✔
662
        """Sets a custom username for a user's bridged account in this protocol.
663

664
        Args:
665
          user (models.User)
666
          username (str)
667

668
        Raises:
669
          ValueError: if the username is invalid
670
          RuntimeError: if the username could not be set
671
        """
672
        raise NotImplementedError()
1✔
673

674
    @classmethod
1✔
675
    def target_for(cls, obj, shared=False):
1✔
676
        """Returns an :class:`Object`'s delivery target (endpoint).
677

678
        To be implemented by subclasses.
679

680
        Examples:
681

682
        * If obj has ``source_protocol`` ``web``, returns its URL, as a
683
          webmention target.
684
        * If obj is an ``activitypub`` actor, returns its inbox.
685
        * If obj is an ``activitypub`` object, returns it's author's or actor's
686
          inbox.
687

688
        Args:
689
          obj (models.Object):
690
          shared (bool): optional. If True, returns a common/shared
691
            endpoint, eg ActivityPub's ``sharedInbox``, that can be reused for
692
            multiple recipients for efficiency
693

694
        Returns:
695
          str: target endpoint, or None if not available.
696
        """
UNCOV
697
        raise NotImplementedError()
×
698

699
    @classmethod
1✔
700
    def is_blocklisted(cls, url, allow_internal=False):
1✔
701
        """Returns True if we block the given URL and shouldn't deliver to it.
702

703
        Default implementation here, subclasses may override.
704

705
        Args:
706
          url (str):
707
          allow_internal (bool): whether to return False for internal domains
708
            like ``fed.brid.gy``, ``bsky.brid.gy``, etc
709
        """
710
        blocklist = DOMAIN_BLOCKLIST
1✔
711
        if not allow_internal:
1✔
712
            blocklist += DOMAINS
1✔
713
        return util.domain_or_parent_in(util.domain_from_link(url), blocklist)
1✔
714

715
    @classmethod
1✔
716
    def translate_ids(to_cls, obj):
1✔
717
        """Translates all ids in an AS1 object to a specific protocol.
718

719
        Infers source protocol for each id value separately.
720

721
        For example, if ``proto`` is :class:`ActivityPub`, the ATProto URI
722
        ``at://did:plc:abc/coll/123`` will be converted to
723
        ``https://bsky.brid.gy/ap/at://did:plc:abc/coll/123``.
724

725
        Wraps these AS1 fields:
726

727
        * ``id``
728
        * ``actor``
729
        * ``author``
730
        * ``bcc``
731
        * ``bto``
732
        * ``cc``
733
        * ``object``
734
        * ``object.actor``
735
        * ``object.author``
736
        * ``object.id``
737
        * ``object.inReplyTo``
738
        * ``object.object``
739
        * ``attachments[].id``
740
        * ``tags[objectType=mention].url``
741
        * ``to``
742

743
        This is the inverse of :meth:`models.Object.resolve_ids`. Much of the
744
        same logic is duplicated there!
745

746
        TODO: unify with :meth:`Object.resolve_ids`,
747
        :meth:`models.Object.normalize_ids`.
748

749
        Args:
750
          to_proto (Protocol subclass)
751
          obj (dict): AS1 object or activity (not :class:`models.Object`!)
752

753
        Returns:
754
          dict: wrapped AS1 version of ``obj``
755
        """
756
        assert to_cls != Protocol
1✔
757
        if not obj:
1✔
758
            return obj
1✔
759

760
        outer_obj = copy.deepcopy(obj)
1✔
761
        inner_objs = outer_obj['object'] = as1.get_objects(outer_obj)
1✔
762

763
        def translate(elem, field, fn, uri=False):
1✔
764
            elem[field] = as1.get_objects(elem, field)
1✔
765
            for obj in elem[field]:
1✔
766
                if id := obj.get('id'):
1✔
767
                    if field in ('to', 'cc', 'bcc', 'bto') and as1.is_audience(id):
1✔
768
                        continue
1✔
769
                    from_cls = Protocol.for_id(id)
1✔
770
                    # TODO: what if from_cls is None? relax translate_object_id,
771
                    # make it a noop if we don't know enough about from/to?
772
                    if from_cls and from_cls != to_cls:
1✔
773
                        obj['id'] = fn(id=id, from_=from_cls, to=to_cls)
1✔
774
                    if obj['id'] and uri:
1✔
775
                        obj['id'] = to_cls(id=obj['id']).id_uri()
1✔
776

777
            elem[field] = [o['id'] if o.keys() == {'id'} else o
1✔
778
                           for o in elem[field]]
779

780
            if len(elem[field]) == 1:
1✔
781
                elem[field] = elem[field][0]
1✔
782

783
        type = as1.object_type(outer_obj)
1✔
784
        translate(outer_obj, 'id',
1✔
785
                  translate_user_id if type in as1.ACTOR_TYPES
786
                  else translate_object_id)
787

788
        for o in inner_objs:
1✔
789
            is_actor = (as1.object_type(o) in as1.ACTOR_TYPES
1✔
790
                        or as1.get_owner(outer_obj) == o.get('id')
791
                        or type in ('follow', 'stop-following'))
792
            translate(o, 'id', translate_user_id if is_actor else translate_object_id)
1✔
793
            obj_is_actor = o.get('verb') in as1.VERBS_WITH_ACTOR_OBJECT
1✔
794
            translate(o, 'object', translate_user_id if obj_is_actor
1✔
795
                      else translate_object_id)
796

797
        for o in [outer_obj] + inner_objs:
1✔
798
            translate(o, 'inReplyTo', translate_object_id)
1✔
799
            for field in 'actor', 'author', 'to', 'cc', 'bto', 'bcc':
1✔
800
                translate(o, field, translate_user_id)
1✔
801
            for tag in as1.get_objects(o, 'tags'):
1✔
802
                if tag.get('objectType') == 'mention':
1✔
803
                    translate(tag, 'url', translate_user_id, uri=True)
1✔
804
            for att in as1.get_objects(o, 'attachments'):
1✔
805
                translate(att, 'id', translate_object_id)
1✔
806
                url = att.get('url')
1✔
807
                if url and not att.get('id'):
1✔
808
                    if from_cls := Protocol.for_id(url):
1✔
809
                        att['id'] = translate_object_id(from_=from_cls, to=to_cls,
1✔
810
                                                        id=url)
811

812
        outer_obj = util.trim_nulls(outer_obj)
1✔
813

814
        if objs := util.get_list(outer_obj ,'object'):
1✔
815
            outer_obj['object'] = [o['id'] if o.keys() == {'id'} else o for o in objs]
1✔
816
            if len(outer_obj['object']) == 1:
1✔
817
                outer_obj['object'] = outer_obj['object'][0]
1✔
818

819
        return outer_obj
1✔
820

821
    @classmethod
1✔
822
    def receive(from_cls, obj, authed_as=None, internal=False, received_at=None):
1✔
823
        """Handles an incoming activity.
824

825
        If ``obj``'s key is unset, ``obj.as1``'s id field is used. If both are
826
        unset, returns HTTP 299.
827

828
        Args:
829
          obj (models.Object)
830
          authed_as (str): authenticated actor id who sent this activity
831
          internal (bool): whether to allow activity ids on internal domains,
832
            from opted out/blocked users, etc.
833
          received_at (datetime): when we first saw (received) this activity.
834
            Right now only used for monitoring.
835

836
        Returns:
837
          (str, int) tuple: (response body, HTTP status code) Flask response
838

839
        Raises:
840
          werkzeug.HTTPException: if the request is invalid
841
        """
842
        # check some invariants
843
        assert from_cls != Protocol
1✔
844
        assert isinstance(obj, Object), obj
1✔
845

846
        if not obj.as1:
1✔
UNCOV
847
            error('No object data provided')
×
848

849
        id = None
1✔
850
        if obj.key and obj.key.id():
1✔
851
            id = obj.key.id()
1✔
852

853
        if not id:
1✔
854
            id = obj.as1.get('id')
1✔
855
            obj.key = ndb.Key(Object, id)
1✔
856

857
        if not id:
1✔
UNCOV
858
            error('No id provided')
×
859
        elif from_cls.owns_id(id) is False:
1✔
860
            error(f'Protocol {from_cls.LABEL} does not own id {id}')
1✔
861
        elif from_cls.is_blocklisted(id, allow_internal=internal):
1✔
862
            error(f'Activity {id} is blocklisted')
1✔
863
        # check that this activity is public. only do this for some activities,
864
        # not eg likes or follows, since Mastodon doesn't currently mark those
865
        # as explicitly public.
866
        elif (obj.type in set(('post', 'update')) | as1.POST_TYPES | as1.ACTOR_TYPES
1✔
867
                  and not as1.is_public(obj.as1, unlisted=False)
868
                  and not as1.is_dm(obj.as1)):
869
              logger.info('Dropping non-public activity')
1✔
870
              return ('OK', 200)
1✔
871

872
        # lease this object, atomically
873
        memcache_key = activity_id_memcache_key(id)
1✔
874
        leased = memcache.memcache.add(memcache_key, 'leased', noreply=False,
1✔
875
                                     expire=5 * 60)  # 5 min
876
        # short circuit if we've already seen this activity id.
877
        # (don't do this for bare objects since we need to check further down
878
        # whether they've been updated since we saw them last.)
879
        if (obj.as1.get('objectType') == 'activity'
1✔
880
            and 'force' not in request.values
881
            and (not leased
882
                 or (obj.new is False and obj.changed is False))):
883
            error(f'Already seen this activity {id}', status=204)
1✔
884

885
        pruned = {k: v for k, v in obj.as1.items()
1✔
886
                  if k not in ('contentMap', 'replies', 'signature')}
887
        delay = ''
1✔
888
        if (received_at and request.headers.get('X-AppEngine-TaskRetryCount') == '0'
1✔
889
                and obj.type != 'delete'):  # we delay deletes for 2m
UNCOV
890
            delay_s = int((util.now().replace(tzinfo=None)
×
891
                           - received_at.replace(tzinfo=None)
892
                           ).total_seconds())
UNCOV
893
            delay = f'({delay_s} s behind)'
×
894
        logger.info(f'Receiving {from_cls.LABEL} {obj.type} {id} {delay} AS1: {json_dumps(pruned, indent=2)}')
1✔
895

896
        # does this protocol support this activity/object type?
897
        from_cls.check_supported(obj)
1✔
898

899
        # check authorization
900
        # https://www.w3.org/wiki/ActivityPub/Primer/Authentication_Authorization
901
        actor = as1.get_owner(obj.as1)
1✔
902
        if not actor:
1✔
903
            error('Activity missing actor or author')
1✔
904
        elif from_cls.owns_id(actor) is False:
1✔
905
            error(f"{from_cls.LABEL} doesn't own actor {actor}, this is probably a bridged activity. Skipping.", status=204)
1✔
906

907
        assert authed_as
1✔
908
        assert isinstance(authed_as, str)
1✔
909
        authed_as = normalize_user_id(id=authed_as, proto=from_cls)
1✔
910
        actor = normalize_user_id(id=actor, proto=from_cls)
1✔
911
        if actor != authed_as:
1✔
912
            report_error("Auth: receive: authed_as doesn't match owner",
1✔
913
                         user=f'{id} authed_as {authed_as} owner {actor}')
914
            error(f"actor {actor} isn't authed user {authed_as}")
1✔
915

916
        # update copy ids to originals
917
        obj.normalize_ids()
1✔
918
        obj.resolve_ids()
1✔
919

920
        if (obj.type == 'follow'
1✔
921
                and Protocol.for_bridgy_subdomain(as1.get_object(obj.as1).get('id'))):
922
            # follows of bot user; refresh user profile first
923
            logger.info(f'Follow of bot user, reloading {actor}')
1✔
924
            from_user = from_cls.get_or_create(id=actor, allow_opt_out=True)
1✔
925
            from_user.reload_profile()
1✔
926
        else:
927
            # load actor user
928
            from_user = from_cls.get_or_create(id=actor, allow_opt_out=internal)
1✔
929

930
        if not internal and (not from_user or from_user.manual_opt_out):
1✔
931
            error(f'Actor {actor} is manually opted out', status=204)
1✔
932

933
        # if this is an object, ie not an activity, wrap it in a create or update
934
        obj = from_cls.handle_bare_object(obj, authed_as=authed_as)
1✔
935
        obj.add('users', from_user.key)
1✔
936

937
        inner_obj_as1 = as1.get_object(obj.as1)
1✔
938
        inner_obj_id = inner_obj_as1.get('id')
1✔
939
        if obj.type in as1.CRUD_VERBS | as1.VERBS_WITH_OBJECT:
1✔
940
            if not inner_obj_id:
1✔
941
                error(f'{obj.type} object has no id!')
1✔
942

943
        # check age. we support backdated posts, but if they're over 2w old, we
944
        # don't deliver them
945
        if obj.type == 'post':
1✔
946
            if published := inner_obj_as1.get('published'):
1✔
947
                try:
×
948
                    published_dt = util.parse_iso8601(published)
×
949
                    if not published_dt.tzinfo:
×
950
                        published_dt = published_dt.replace(tzinfo=timezone.utc)
×
951
                    age = util.now() - published_dt
×
UNCOV
952
                    if age > CREATE_MAX_AGE:
×
953
                        error(f'Ignoring, too old, {age} is over {CREATE_MAX_AGE}',
×
954
                              status=204)
UNCOV
955
                except ValueError:  # from parse_iso8601
×
UNCOV
956
                    logger.debug(f"Couldn't parse published {published}")
×
957

958
        # write Object to datastore
959
        obj.source_protocol = from_cls.LABEL
1✔
960
        if obj.type in STORE_AS1_TYPES:
1✔
961
            obj.put()
1✔
962

963
        # store inner object
964
        # TODO: unify with big obj.type conditional below. would have to merge
965
        # this with the DM handling block lower down.
966
        crud_obj = None
1✔
967
        if obj.type in ('post', 'update') and inner_obj_as1.keys() > set(['id']):
1✔
968
            crud_obj = Object.get_or_create(inner_obj_id, our_as1=inner_obj_as1,
1✔
969
                                            source_protocol=from_cls.LABEL,
970
                                            authed_as=actor, users=[from_user.key])
971

972
        actor = as1.get_object(obj.as1, 'actor')
1✔
973
        actor_id = actor.get('id')
1✔
974

975
        # handle activity!
976
        if obj.type == 'stop-following':
1✔
977
            # TODO: unify with handle_follow?
978
            # TODO: handle multiple followees
979
            if not actor_id or not inner_obj_id:
1✔
UNCOV
980
                error(f'stop-following requires actor id and object id. Got: {actor_id} {inner_obj_id} {obj.as1}')
×
981

982
            # deactivate Follower
983
            from_ = from_cls.key_for(actor_id)
1✔
984
            to_cls = Protocol.for_id(inner_obj_id)
1✔
985
            to = to_cls.key_for(inner_obj_id)
1✔
986
            follower = Follower.query(Follower.to == to,
1✔
987
                                      Follower.from_ == from_,
988
                                      Follower.status == 'active').get()
989
            if follower:
1✔
990
                logger.info(f'Marking {follower} inactive')
1✔
991
                follower.status = 'inactive'
1✔
992
                follower.put()
1✔
993
            else:
994
                logger.warning(f'No Follower found for {from_} => {to}')
1✔
995

996
            # fall through to deliver to followee
997
            # TODO: do we convert stop-following to webmention 410 of original
998
            # follow?
999

1000
            # fall through to deliver to followers
1001

1002
        elif obj.type in ('delete', 'undo'):
1✔
1003
            delete_obj_id = (from_user.profile_id()
1✔
1004
                            if inner_obj_id == from_user.key.id()
1005
                            else inner_obj_id)
1006

1007
            delete_obj = Object.get_by_id(delete_obj_id, authed_as=authed_as)
1✔
1008
            if not delete_obj:
1✔
1009
                logger.info(f"Ignoring, we don't have {delete_obj_id} stored")
1✔
1010
                return 'OK', 204
1✔
1011

1012
            # TODO: just delete altogether!
1013
            logger.info(f'Marking Object {delete_obj_id} deleted')
1✔
1014
            delete_obj.deleted = True
1✔
1015
            delete_obj.put()
1✔
1016

1017
            # if this is an actor, handle deleting it later so that
1018
            # in case it's from_user, user.enabled_protocols is still populated
1019
            #
1020
            # fall through to deliver to followers and delete copy if necessary.
1021
            # should happen via protocol-specific copy target and send of
1022
            # delete activity.
1023
            # https://github.com/snarfed/bridgy-fed/issues/63
1024

1025
        elif obj.type == 'block':
1✔
1026
            if proto := Protocol.for_bridgy_subdomain(inner_obj_id):
1✔
1027
                # blocking protocol bot user disables that protocol
1028
                from_user.delete(proto)
1✔
1029
                from_user.disable_protocol(proto)
1✔
1030
                return 'OK', 200
1✔
1031

1032
        elif obj.type == 'post':
1✔
1033
            # handle DMs to bot users
1034
            if as1.is_dm(obj.as1):
1✔
1035
                return dms.receive(from_user=from_user, obj=obj)
1✔
1036

1037
        # fetch actor if necessary
1038
        if (actor and actor.keys() == set(['id'])
1✔
1039
                and obj.type not in ('delete', 'undo')):
1040
            logger.debug('Fetching actor so we have name, profile photo, etc')
1✔
1041
            actor_obj = from_cls.load(ids.profile_id(id=actor['id'], proto=from_cls),
1✔
1042
                                      raise_=False)
1043
            if actor_obj and actor_obj.as1:
1✔
1044
                obj.our_as1 = {
1✔
1045
                    **obj.as1, 'actor': {
1046
                        **actor_obj.as1,
1047
                        # override profile id with actor id
1048
                        # https://github.com/snarfed/bridgy-fed/issues/1720
1049
                        'id': actor['id'],
1050
                    }
1051
                }
1052

1053
        # fetch object if necessary
1054
        if (obj.type in ('post', 'update', 'share')
1✔
1055
                and inner_obj_as1.keys() == set(['id'])
1056
                and from_cls.owns_id(inner_obj_id)):
1057
            logger.debug('Fetching inner object')
1✔
1058
            inner_obj = from_cls.load(inner_obj_id, raise_=False,
1✔
1059
                                      remote=(obj.type in ('post', 'update')))
1060
            if obj.type in ('post', 'update'):
1✔
1061
                crud_obj = inner_obj
1✔
1062
            if inner_obj and inner_obj.as1:
1✔
1063
                obj.our_as1 = {
1✔
1064
                    **obj.as1,
1065
                    'object': {
1066
                        **inner_obj_as1,
1067
                        **inner_obj.as1,
1068
                    }
1069
                }
1070
            elif obj.type in ('post', 'update'):
1✔
1071
                error("Need object {inner_obj_id} but couldn't fetch, giving up")
1✔
1072

1073
        if obj.type == 'follow':
1✔
1074
            if proto := Protocol.for_bridgy_subdomain(inner_obj_id):
1✔
1075
                # follow of one of our protocol bot users; enable that protocol.
1076
                # fall through so that we send an accept.
1077
                try:
1✔
1078
                    from_user.enable_protocol(proto)
1✔
1079
                except ErrorButDoNotRetryTask:
1✔
1080
                    from web import Web
1✔
1081
                    bot = Web.get_by_id(proto.bot_user_id())
1✔
1082
                    from_cls.respond_to_follow('reject', follower=from_user,
1✔
1083
                                               followee=bot, follow=obj)
1084
                    raise
1✔
1085
                proto.bot_follow(from_user)
1✔
1086

1087
            from_cls.handle_follow(obj)
1✔
1088

1089
        # deliver to targets
1090
        resp = from_cls.deliver(obj, from_user=from_user, crud_obj=crud_obj)
1✔
1091

1092
        # if this is a user, deactivate its followers/followings
1093
        # https://github.com/snarfed/bridgy-fed/issues/1304
1094
        if obj.type == 'delete':
1✔
1095
            if user_key := from_cls.key_for(id=inner_obj_id):
1✔
1096
                if user := user_key.get():
1✔
1097
                    for proto in user.enabled_protocols:
1✔
1098
                        user.disable_protocol(PROTOCOLS[proto])
1✔
1099

1100
                    logger.info(f'Deactivating Followers from or to {user_key.id()}')
1✔
1101
                    followers = Follower.query(
1✔
1102
                        OR(Follower.to == user_key, Follower.from_ == user_key)
1103
                        ).fetch()
1104
                    for f in followers:
1✔
1105
                        f.status = 'inactive'
1✔
1106
                    ndb.put_multi(followers)
1✔
1107

1108
        memcache.memcache.set(memcache_key, 'done', expire=7 * 24 * 60 * 60)  # 1w
1✔
1109
        return resp
1✔
1110

1111
    @classmethod
1✔
1112
    def handle_follow(from_cls, obj):
1✔
1113
        """Handles an incoming follow activity.
1114

1115
        Sends an ``Accept`` back, but doesn't send the ``Follow`` itself. That
1116
        happens in :meth:`deliver`.
1117

1118
        Args:
1119
          obj (models.Object): follow activity
1120
        """
1121
        logger.debug('Got follow. Loading users, storing Follow(s), sending accept(s)')
1✔
1122

1123
        # Prepare follower (from) users' data
1124
        # TODO: remove all of this and just use from_user
1125
        from_as1 = as1.get_object(obj.as1, 'actor')
1✔
1126
        from_id = from_as1.get('id')
1✔
1127
        if not from_id:
1✔
UNCOV
1128
            error(f'Follow activity requires actor. Got: {obj.as1}')
×
1129

1130
        from_obj = from_cls.load(from_id, raise_=False)
1✔
1131
        if not from_obj:
1✔
UNCOV
1132
            error(f"Couldn't load {from_id}", status=502)
×
1133

1134
        if not from_obj.as1:
1✔
1135
            from_obj.our_as1 = from_as1
1✔
1136
            from_obj.put()
1✔
1137

1138
        from_key = from_cls.key_for(from_id)
1✔
1139
        if not from_key:
1✔
UNCOV
1140
            error(f'Invalid {from_cls.LABEL} user key: {from_id}')
×
1141
        obj.users = [from_key]
1✔
1142
        from_user = from_cls.get_or_create(id=from_key.id(), obj=from_obj)
1✔
1143

1144
        # Prepare followee (to) users' data
1145
        to_as1s = as1.get_objects(obj.as1)
1✔
1146
        if not to_as1s:
1✔
UNCOV
1147
            error(f'Follow activity requires object(s). Got: {obj.as1}')
×
1148

1149
        # Store Followers
1150
        for to_as1 in to_as1s:
1✔
1151
            to_id = to_as1.get('id')
1✔
1152
            if not to_id:
1✔
UNCOV
1153
                error(f'Follow activity requires object(s). Got: {obj.as1}')
×
1154

1155
            logger.info(f'Follow {from_id} => {to_id}')
1✔
1156

1157
            to_cls = Protocol.for_id(to_id)
1✔
1158
            if not to_cls:
1✔
UNCOV
1159
                error(f"Couldn't determine protocol for {to_id}")
×
1160
            elif from_cls == to_cls:
1✔
1161
                logger.info(f'Skipping same-protocol Follower {from_id} => {to_id}')
1✔
1162
                continue
1✔
1163

1164
            to_obj = to_cls.load(to_id)
1✔
1165
            if to_obj and not to_obj.as1:
1✔
1166
                to_obj.our_as1 = to_as1
1✔
1167
                to_obj.put()
1✔
1168

1169
            to_key = to_cls.key_for(to_id)
1✔
1170
            if not to_key:
1✔
UNCOV
1171
                logger.info(f'Skipping invalid {from_cls.LABEL} user key: {from_id}')
×
UNCOV
1172
                continue
×
1173

1174
            to_user = to_cls.get_or_create(id=to_key.id(), obj=to_obj,
1✔
1175
                                           allow_opt_out=True)
1176
            follower_obj = Follower.get_or_create(to=to_user, from_=from_user,
1✔
1177
                                                  follow=obj.key, status='active')
1178
            obj.add('notify', to_key)
1✔
1179
            from_cls.respond_to_follow('accept', follower=from_user,
1✔
1180
                                       followee=to_user, follow=obj)
1181

1182
    @classmethod
1✔
1183
    def respond_to_follow(_, verb, follower, followee, follow):
1✔
1184
        """Sends an accept or reject activity for a follow.
1185

1186
        ...if the follower's protocol supports accepts/rejects. Otherwise, does
1187
        nothing.
1188

1189
        Args:
1190
          verb (str): ``accept`` or  ``reject``
1191
          follower (models.User)
1192
          followee (models.User)
1193
          follow (models.Object)
1194
        """
1195
        assert verb in ('accept', 'reject')
1✔
1196
        if verb not in follower.SUPPORTED_AS1_TYPES:
1✔
1197
            return
1✔
1198

1199
        target = follower.target_for(follower.obj)
1✔
1200
        if not target:
1✔
UNCOV
1201
            error(f"Couldn't find delivery target for follower {follower.key.id()}")
×
1202

1203
        # send. note that this is one response for the whole follow, even if it
1204
        # has multiple followees!
1205
        id = f'{followee.key.id()}/followers#{verb}-{follow.key.id()}'
1✔
1206
        accept = {
1✔
1207
            'id': id,
1208
            'objectType': 'activity',
1209
            'verb': verb,
1210
            'actor': followee.key.id(),
1211
            'object': follow.as1,
1212
        }
1213
        common.create_task(queue='send', id=id, our_as1=accept, url=target,
1✔
1214
                           protocol=follower.LABEL, user=followee.key.urlsafe())
1215

1216
    @classmethod
1✔
1217
    def bot_follow(bot_cls, user):
1✔
1218
        """Follow a user from a protocol bot user.
1219

1220
        ...so that the protocol starts sending us their activities, if it needs
1221
        a follow for that (eg ActivityPub).
1222

1223
        Args:
1224
          user (User)
1225
        """
1226
        from web import Web
1✔
1227
        bot = Web.get_by_id(bot_cls.bot_user_id())
1✔
1228
        now = util.now().isoformat()
1✔
1229
        logger.info(f'Following {user.key.id()} back from bot user {bot.key.id()}')
1✔
1230

1231
        if not user.obj:
1✔
1232
            logger.info("  can't follow, user has no profile obj")
1✔
1233
            return
1✔
1234

1235
        target = user.target_for(user.obj)
1✔
1236
        follow_back_id = f'https://{bot.key.id()}/#follow-back-{user.key.id()}-{now}'
1✔
1237
        follow_back_as1 = {
1✔
1238
            'objectType': 'activity',
1239
            'verb': 'follow',
1240
            'id': follow_back_id,
1241
            'actor': bot.key.id(),
1242
            'object': user.key.id(),
1243
        }
1244
        common.create_task(queue='send', id=follow_back_id,
1✔
1245
                           our_as1=follow_back_as1, url=target,
1246
                           source_protocol='web', protocol=user.LABEL,
1247
                           user=bot.key.urlsafe())
1248

1249
    @classmethod
1✔
1250
    def handle_bare_object(cls, obj, authed_as=None):
1✔
1251
        """If obj is a bare object, wraps it in a create or update activity.
1252

1253
        Checks if we've seen it before.
1254

1255
        Args:
1256
          obj (models.Object)
1257
          authed_as (str): authenticated actor id who sent this activity
1258

1259
        Returns:
1260
          models.Object: ``obj`` if it's an activity, otherwise a new object
1261
        """
1262
        is_actor = obj.type in as1.ACTOR_TYPES
1✔
1263
        if not is_actor and obj.type not in ('note', 'article', 'comment'):
1✔
1264
            return obj
1✔
1265

1266
        obj_actor = ids.normalize_user_id(id=as1.get_owner(obj.as1), proto=cls)
1✔
1267
        now = util.now().isoformat()
1✔
1268

1269
        # occasionally we override the object, eg if this is a profile object
1270
        # coming in via a user with use_instead set
1271
        obj_as1 = obj.as1
1✔
1272
        if obj_id := obj.key.id():
1✔
1273
            if obj_as1_id := obj_as1.get('id'):
1✔
1274
                if obj_id != obj_as1_id:
1✔
1275
                    logger.info(f'Overriding AS1 object id {obj_as1_id} with Object id {obj_id}')
1✔
1276
                    obj_as1['id'] = obj_id
1✔
1277

1278
        # this is a raw post; wrap it in a create or update activity
1279
        if obj.changed or is_actor:
1✔
1280
            if obj.changed:
1✔
1281
                logger.info(f'Content has changed from last time at {obj.updated}! Redelivering to all inboxes')
1✔
1282
            else:
1283
                logger.info(f'Got actor profile object, wrapping in update')
1✔
1284
            id = f'{obj.key.id()}#bridgy-fed-update-{now}'
1✔
1285
            update_as1 = {
1✔
1286
                'objectType': 'activity',
1287
                'verb': 'update',
1288
                'id': id,
1289
                'actor': obj_actor,
1290
                'object': {
1291
                    # Mastodon requires the updated field for Updates, so
1292
                    # add a default value.
1293
                    # https://docs.joinmastodon.org/spec/activitypub/#supported-activities-for-statuses
1294
                    # https://socialhub.activitypub.rocks/t/what-could-be-the-reason-that-my-update-activity-does-not-work/2893/4
1295
                    # https://github.com/mastodon/documentation/pull/1150
1296
                    'updated': now,
1297
                    **obj_as1,
1298
                },
1299
            }
1300
            logger.debug(f'  AS1: {json_dumps(update_as1, indent=2)}')
1✔
1301
            return Object(id=id, our_as1=update_as1,
1✔
1302
                          source_protocol=obj.source_protocol)
1303

1304
        if (obj.new
1✔
1305
                # HACK: force query param here is specific to webmention
1306
                or 'force' in request.form):
1307
            create_id = f'{obj.key.id()}#bridgy-fed-create'
1✔
1308
            create_as1 = {
1✔
1309
                'objectType': 'activity',
1310
                'verb': 'post',
1311
                'id': create_id,
1312
                'actor': obj_actor,
1313
                'object': obj_as1,
1314
                'published': now,
1315
            }
1316
            logger.info(f'Wrapping in post')
1✔
1317
            logger.debug(f'  AS1: {json_dumps(create_as1, indent=2)}')
1✔
1318
            return Object(id=create_id, our_as1=create_as1,
1✔
1319
                          source_protocol=obj.source_protocol)
1320

1321
        error(f'{obj.key.id()} is unchanged, nothing to do', status=204)
1✔
1322

1323
    @classmethod
1✔
1324
    def deliver(from_cls, obj, from_user, crud_obj=None, to_proto=None):
1✔
1325
        """Delivers an activity to its external recipients.
1326

1327
        Args:
1328
          obj (models.Object): activity to deliver
1329
          from_user (models.User): user (actor) this activity is from
1330
          crud_obj (models.Object): if this is a create, update, or delete/undo
1331
            activity, the inner object that's being written, otherwise None.
1332
            (This object's ``notify`` and ``feed`` properties may be updated.)
1333
          to_proto (protocol.Protocol): optional; if provided, only deliver to
1334
            targets on this protocol
1335

1336
        Returns:
1337
          (str, int) tuple: Flask response
1338
        """
1339
        if to_proto:
1✔
1340
            logger.info(f'Only delivering to {to_proto.LABEL}')
1✔
1341

1342
        # find delivery targets. maps Target to Object or None
1343
        #
1344
        # ...then write the relevant object, since targets() has a side effect of
1345
        # setting the notify and feed properties (and dirty attribute)
1346
        targets = from_cls.targets(obj, from_user=from_user, crud_obj=crud_obj)
1✔
1347
        if not targets:
1✔
1348
            return r'No targets, nothing to do ¯\_(ツ)_/¯', 204
1✔
1349

1350
        # store object that targets() updated
1351
        if crud_obj and crud_obj.dirty:
1✔
1352
            crud_obj.put()
1✔
1353
        elif obj.type in STORE_AS1_TYPES and obj.dirty:
1✔
1354
            obj.put()
1✔
1355

1356
        obj_params = ({'obj_id': obj.key.id()} if obj.type in STORE_AS1_TYPES
1✔
1357
                      else obj.to_request())
1358

1359
        # sort targets so order is deterministic for tests, debugging, etc
1360
        sorted_targets = sorted(targets.items(), key=lambda t: t[0].uri)
1✔
1361

1362
        # enqueue send task for each targets
1363
        logger.info(f'Delivering to: {[t for t, _ in sorted_targets]}')
1✔
1364
        user = from_user.key.urlsafe()
1✔
1365
        for i, (target, orig_obj) in enumerate(sorted_targets):
1✔
1366
            if to_proto and target.protocol != to_proto.LABEL:
1✔
UNCOV
1367
                continue
×
1368
            orig_obj_id = orig_obj.key.id() if orig_obj else None
1✔
1369
            common.create_task(queue='send', url=target.uri, protocol=target.protocol,
1✔
1370
                               orig_obj_id=orig_obj_id, user=user, **obj_params)
1371

1372
        return 'OK', 202
1✔
1373

1374
    @classmethod
1✔
1375
    def targets(from_cls, obj, from_user, crud_obj=None, internal=False):
1✔
1376
        """Collects the targets to send a :class:`models.Object` to.
1377

1378
        Targets are both objects - original posts, events, etc - and actors.
1379

1380
        Args:
1381
          obj (models.Object)
1382
          from_user (User)
1383
          crud_obj (models.Object): if this is a create, update, or delete/undo
1384
            activity, the inner object that's being written, otherwise None.
1385
            (This object's ``notify`` and ``feed`` properties may be updated.)
1386
          internal (bool): whether this is a recursive internal call
1387

1388
        Returns:
1389
          dict: maps :class:`models.Target` to original (in response to)
1390
          :class:`models.Object`, if any, otherwise None
1391
        """
1392
        logger.debug('Finding recipients and their targets')
1✔
1393

1394
        # we should only have crud_obj iff this is a create or update
1395
        assert (crud_obj is not None) == (obj.type in ('post', 'update')), obj.type
1✔
1396
        write_obj = crud_obj or obj
1✔
1397
        write_obj.dirty = False
1✔
1398

1399
        target_uris = sorted(set(as1.targets(obj.as1)))
1✔
1400
        logger.info(f'Raw targets: {target_uris}')
1✔
1401
        orig_obj = None
1✔
1402
        targets = {}  # maps Target to Object or None
1✔
1403
        owner = as1.get_owner(obj.as1)
1✔
1404
        allow_opt_out = (obj.type == 'delete')
1✔
1405
        inner_obj_as1 = as1.get_object(obj.as1)
1✔
1406
        inner_obj_id = inner_obj_as1.get('id')
1✔
1407
        in_reply_tos = as1.get_ids(inner_obj_as1, 'inReplyTo')
1✔
1408
        is_reply = obj.type == 'comment' or in_reply_tos
1✔
1409
        is_self_reply = False
1✔
1410

1411
        original_ids = []
1✔
1412
        if is_reply:
1✔
1413
            original_ids = in_reply_tos
1✔
1414
        elif inner_obj_id:
1✔
1415
            if inner_obj_id == from_user.key.id():
1✔
1416
                inner_obj_id = from_user.profile_id()
1✔
1417
            original_ids = [inner_obj_id]
1✔
1418

1419
        # which protocols should we allow delivering to?
1420
        to_protocols = []
1✔
1421
        for label in (list(from_user.DEFAULT_ENABLED_PROTOCOLS)
1✔
1422
                      + from_user.enabled_protocols):
1423
            proto = PROTOCOLS[label]
1✔
1424
            if proto.HAS_COPIES and (obj.type in ('update', 'delete', 'share', 'undo')
1✔
1425
                                     or is_reply):
1426
                for id in original_ids:
1✔
1427
                    if Protocol.for_id(id) == proto:
1✔
1428
                        logger.info(f'Allowing {label} for original post {id}')
1✔
1429
                        break
1✔
1430
                    elif orig := from_user.load(id, remote=False):
1✔
1431
                        if orig.get_copy(proto):
1✔
1432
                            logger.info(f'Allowing {label}, original post {id} was bridged there')
1✔
1433
                            break
1✔
1434
                else:
1435
                    logger.info(f"Skipping {label}, original objects {original_ids} weren't bridged there")
1✔
1436
                    continue
1✔
1437

1438
            util.add(to_protocols, proto)
1✔
1439

1440
        # process direct targets
1441
        for id in sorted(target_uris):
1✔
1442
            target_proto = Protocol.for_id(id)
1✔
1443
            if not target_proto:
1✔
1444
                logger.info(f"Can't determine protocol for {id}")
1✔
1445
                continue
1✔
1446
            elif target_proto.is_blocklisted(id):
1✔
1447
                logger.debug(f'{id} is blocklisted')
1✔
1448
                continue
1✔
1449

1450
            orig_obj = target_proto.load(id, raise_=False)
1✔
1451
            if not orig_obj or not orig_obj.as1:
1✔
1452
                logger.info(f"Couldn't load {id}")
1✔
1453
                continue
1✔
1454

1455
            target_author_key = target_proto.actor_key(orig_obj)
1✔
1456
            if not from_user.is_enabled(target_proto):
1✔
1457
                # if author isn't bridged and inReplyTo author is, DM a prompt
1458
                if id in in_reply_tos and target_author_key:
1✔
1459
                    if target_author := target_author_key.get():
1✔
1460
                        if target_author.is_enabled(from_cls):
1✔
1461
                            dms.maybe_send(
1✔
1462
                                from_proto=target_proto, to_user=from_user,
1463
                                type='replied_to_bridged_user', text=f"""\
1464
Hi! You <a href="{inner_obj_as1.get('url') or inner_obj_id}">recently replied</a> to {orig_obj.actor_link(image=False)}, who's bridged here from {target_proto.PHRASE}. If you want them to see your replies, you can bridge your account into {target_proto.PHRASE} by following this account. <a href="https://fed.brid.gy/docs">See the docs</a> for more information.""")
1465

1466
                continue
1✔
1467

1468
            # deliver self-replies to followers
1469
            # https://github.com/snarfed/bridgy-fed/issues/639
1470
            if id in in_reply_tos and owner == as1.get_owner(orig_obj.as1):
1✔
1471
                is_self_reply = True
1✔
1472
                logger.info(f'self reply!')
1✔
1473

1474
            # also add copies' targets
1475
            for copy in orig_obj.copies:
1✔
1476
                proto = PROTOCOLS[copy.protocol]
1✔
1477
                if proto in to_protocols:
1✔
1478
                    # copies generally won't have their own Objects
1479
                    if target := proto.target_for(Object(id=copy.uri)):
1✔
1480
                        logger.debug(f'Adding target {target} for copy {copy.uri} of original {id}')
1✔
1481
                        targets[Target(protocol=copy.protocol, uri=target)] = orig_obj
1✔
1482

1483
            if target_proto == from_cls:
1✔
1484
                logger.debug(f'Skipping same-protocol target {id}')
1✔
1485
                continue
1✔
1486

1487
            target = target_proto.target_for(orig_obj)
1✔
1488
            if not target:
1✔
1489
                # TODO: surface errors like this somehow?
UNCOV
1490
                logger.error(f"Can't find delivery target for {id}")
×
UNCOV
1491
                continue
×
1492

1493
            logger.debug(f'Target for {id} is {target}')
1✔
1494
            # only use orig_obj for inReplyTos, like/repost objects, etc
1495
            # https://github.com/snarfed/bridgy-fed/issues/1237
1496
            targets[Target(protocol=target_proto.LABEL, uri=target)] = (
1✔
1497
                orig_obj if id in in_reply_tos or id in as1.get_ids(obj.as1, 'object')
1498
                else None)
1499

1500
            if target_author_key:
1✔
1501
                logger.debug(f'Recipient is {target_author_key}')
1✔
1502
                if write_obj.add('notify', target_author_key):
1✔
1503
                    write_obj.dirty = True
1✔
1504

1505
        if obj.type == 'undo':
1✔
1506
            logger.debug('Object is an undo; adding targets for inner object')
1✔
1507
            if set(inner_obj_as1.keys()) == {'id'}:
1✔
1508
                inner_obj = from_cls.load(inner_obj_id, raise_=False)
1✔
1509
            else:
1510
                inner_obj = Object(id=inner_obj_id, our_as1=inner_obj_as1)
1✔
1511
            if inner_obj:
1✔
1512
                targets.update(from_cls.targets(inner_obj, from_user=from_user,
1✔
1513
                                                internal=True))
1514

1515
        logger.info(f'Direct targets: {[t.uri for t in targets.keys()]}')
1✔
1516

1517
        # deliver to followers, if appropriate
1518
        user_key = from_cls.actor_key(obj, allow_opt_out=allow_opt_out)
1✔
1519
        if not user_key:
1✔
1520
            logger.info("Can't tell who this is from! Skipping followers.")
1✔
1521
            return targets
1✔
1522

1523
        followers = []
1✔
1524
        if (obj.type in ('post', 'update', 'delete', 'share', 'undo')
1✔
1525
                and (not is_reply or is_self_reply)):
1526
            logger.info(f'Delivering to followers of {user_key}')
1✔
1527
            followers = [
1✔
1528
                f for f in Follower.query(Follower.to == user_key,
1529
                                          Follower.status == 'active')
1530
                # skip protocol bot users
1531
                if not Protocol.for_bridgy_subdomain(f.from_.id())
1532
                # skip protocols this user hasn't enabled, or where the base
1533
                # object of this activity hasn't been bridged
1534
                and PROTOCOLS_BY_KIND[f.from_.kind()] in to_protocols]
1535
            user_keys = [f.from_ for f in followers]
1✔
1536
            users = [u for u in ndb.get_multi(user_keys) if u]
1✔
1537
            User.load_multi(users)
1✔
1538

1539
            if (not followers and
1✔
1540
                (util.domain_or_parent_in(
1541
                    util.domain_from_link(from_user.key.id()), LIMITED_DOMAINS)
1542
                 or util.domain_or_parent_in(
1543
                     util.domain_from_link(obj.key.id()), LIMITED_DOMAINS))):
1544
                logger.info(f'skipping, {from_user.key.id()} is on a limited domain and has no followers')
1✔
1545
                return {}
1✔
1546

1547
            # add to followers' feeds, if any
1548
            if not internal and obj.type in ('post', 'update', 'share'):
1✔
1549
                if write_obj.type not in as1.ACTOR_TYPES:
1✔
1550
                    write_obj.feed = [u.key for u in users]
1✔
1551
                    if write_obj.feed:
1✔
1552
                        write_obj.dirty = True
1✔
1553

1554
            # collect targets for followers
1555
            for user in users:
1✔
1556
                # TODO: should we pass remote=False through here to Protocol.load?
1557
                target = user.target_for(user.obj, shared=True) if user.obj else None
1✔
1558
                if not target:
1✔
1559
                    # TODO: surface errors like this somehow?
1560
                    logger.error(f'Follower {user.key} has no delivery target')
1✔
1561
                    continue
1✔
1562

1563
                # normalize URL (lower case hostname, etc)
1564
                # ...but preserve our PDS URL without trailing slash in path
1565
                # https://atproto.com/specs/did#did-documents
1566
                target = util.dedupe_urls([target], trailing_slash=False)[0]
1✔
1567

1568
                targets[Target(protocol=user.LABEL, uri=target)] = \
1✔
1569
                    Object.get_by_id(inner_obj_id) if obj.type == 'share' else None
1570

1571
        # deliver to enabled HAS_COPIES protocols proactively
1572
        # TODO: abstract for other protocols
1573
        from atproto import ATProto
1✔
1574
        if (ATProto in to_protocols
1✔
1575
                and obj.type in ('post', 'update', 'delete', 'share')):
1576
            logger.info(f'user has ATProto enabled, adding {ATProto.PDS_URL}')
1✔
1577
            targets.setdefault(
1✔
1578
                Target(protocol=ATProto.LABEL, uri=ATProto.PDS_URL), None)
1579

1580
        # de-dupe targets, discard same-domain
1581
        # maps string target URL to (Target, Object) tuple
1582
        candidates = {t.uri: (t, obj) for t, obj in targets.items()}
1✔
1583
        # maps Target to Object or None
1584
        targets = {}
1✔
1585
        source_domains = [
1✔
1586
            util.domain_from_link(url) for url in
1587
            (obj.as1.get('id'), obj.as1.get('url'), as1.get_owner(obj.as1))
1588
            if util.is_web(url)
1589
        ]
1590
        for url in sorted(util.dedupe_urls(
1✔
1591
                candidates.keys(),
1592
                # preserve our PDS URL without trailing slash in path
1593
                # https://atproto.com/specs/did#did-documents
1594
                trailing_slash=False)):
1595
            if util.is_web(url) and util.domain_from_link(url) in source_domains:
1✔
UNCOV
1596
                logger.info(f'Skipping same-domain target {url}')
×
UNCOV
1597
                continue
×
1598
            target, obj = candidates[url]
1✔
1599
            targets[target] = obj
1✔
1600

1601
        return targets
1✔
1602

1603
    @classmethod
1✔
1604
    def load(cls, id, remote=None, local=True, raise_=True, **kwargs):
1✔
1605
        """Loads and returns an Object from datastore or HTTP fetch.
1606

1607
        Sets the :attr:`new` and :attr:`changed` attributes if we know either
1608
        one for the loaded object, ie local is True and remote is True or None.
1609

1610
        Args:
1611
          id (str)
1612
          remote (bool): whether to fetch the object over the network. If True,
1613
            fetches even if we already have the object stored, and updates our
1614
            stored copy. If False and we don't have the object stored, returns
1615
            None. Default (None) means to fetch over the network only if we
1616
            don't already have it stored.
1617
          local (bool): whether to load from the datastore before
1618
            fetching over the network. If False, still stores back to the
1619
            datastore after a successful remote fetch.
1620
          raise_ (bool): if False, catches any :class:`request.RequestException`
1621
            or :class:`HTTPException` raised by :meth:`fetch()` and returns
1622
            ``None`` instead
1623
          kwargs: passed through to :meth:`fetch()`
1624

1625
        Returns:
1626
          models.Object: loaded object, or None if it isn't fetchable, eg a
1627
          non-URL string for Web, or ``remote`` is False and it isn't in the
1628
          datastore
1629

1630
        Raises:
1631
          requests.HTTPError: anything that :meth:`fetch` raises, if ``raise_``
1632
            is True
1633
        """
1634
        assert id
1✔
1635
        assert local or remote is not False
1✔
1636
        # logger.debug(f'Loading Object {id} local={local} remote={remote}')
1637

1638
        obj = orig_as1 = None
1✔
1639
        if local:
1✔
1640
            obj = Object.get_by_id(id)
1✔
1641
            if not obj:
1✔
1642
                # logger.debug(f' {id} not in datastore')
1643
                pass
1✔
1644
            elif obj.as1 or obj.raw or obj.deleted:
1✔
1645
                # logger.debug(f'  {id} got from datastore')
1646
                obj.new = False
1✔
1647

1648
        if remote is False:
1✔
1649
            return obj
1✔
1650
        elif remote is None and obj:
1✔
1651
            if obj.updated < util.as_utc(util.now() - OBJECT_REFRESH_AGE):
1✔
1652
                # logger.debug(f'  last updated {obj.updated}, refreshing')
1653
                pass
1✔
1654
            else:
1655
                return obj
1✔
1656

1657
        if obj:
1✔
1658
            orig_as1 = obj.as1
1✔
1659
            obj.our_as1 = None
1✔
1660
            obj.new = False
1✔
1661
        else:
1662
            obj = Object(id=id)
1✔
1663
            if local:
1✔
1664
                # logger.debug(f'  {id} not in datastore')
1665
                obj.new = True
1✔
1666
                obj.changed = False
1✔
1667

1668
        try:
1✔
1669
            fetched = cls.fetch(obj, **kwargs)
1✔
1670
        except (RequestException, HTTPException) as e:
1✔
1671
            if raise_:
1✔
1672
                raise
1✔
1673
            util.interpret_http_exception(e)
1✔
1674
            return None
1✔
1675

1676
        if not fetched:
1✔
1677
            return None
1✔
1678

1679
        # https://stackoverflow.com/a/3042250/186123
1680
        size = len(_entity_to_protobuf(obj)._pb.SerializeToString())
1✔
1681
        if size > models.MAX_ENTITY_SIZE:
1✔
1682
            logger.warning(f'Object is too big! {size} bytes is over {models.MAX_ENTITY_SIZE}')
1✔
1683
            return None
1✔
1684

1685
        obj.resolve_ids()
1✔
1686
        obj.normalize_ids()
1✔
1687

1688
        if obj.new is False:
1✔
1689
            obj.changed = obj.activity_changed(orig_as1)
1✔
1690

1691
        if obj.source_protocol not in (cls.LABEL, cls.ABBREV):
1✔
1692
            if obj.source_protocol:
1✔
UNCOV
1693
                logger.warning(f'Object {obj.key.id()} changed protocol from {obj.source_protocol} to {cls.LABEL} ?!')
×
1694
            obj.source_protocol = cls.LABEL
1✔
1695

1696
        obj.put()
1✔
1697
        return obj
1✔
1698

1699
    @classmethod
1✔
1700
    def check_supported(cls, obj):
1✔
1701
        """If this protocol doesn't support this object, raises HTTP 204.
1702

1703
        Also reports an error.
1704

1705
        (This logic is duplicated in some protocols, eg ActivityPub, so that
1706
        they can short circuit out early. It generally uses their native formats
1707
        instead of AS1, before an :class:`models.Object` is created.)
1708

1709
        Args:
1710
          obj (Object)
1711

1712
        Raises:
1713
          werkzeug.HTTPException: if this protocol doesn't support this object
1714
        """
1715
        if not obj.type:
1✔
UNCOV
1716
            return
×
1717

1718
        inner_type = as1.object_type(as1.get_object(obj.as1)) or ''
1✔
1719
        if (obj.type not in cls.SUPPORTED_AS1_TYPES
1✔
1720
            or (obj.type in as1.CRUD_VERBS
1721
                and inner_type
1722
                and inner_type not in cls.SUPPORTED_AS1_TYPES)):
1723
            error(f"Bridgy Fed for {cls.LABEL} doesn't support {obj.type} {inner_type} yet", status=204)
1✔
1724

1725
        # don't allow posts with blank content and no image/video/audio
1726
        crud_obj = (as1.get_object(obj.as1) if obj.type in ('post', 'update')
1✔
1727
                    else obj.as1)
1728
        if (crud_obj.get('objectType') in as1.POST_TYPES
1✔
1729
                and not util.get_url(crud_obj, key='image')
1730
                and not any(util.get_urls(crud_obj, 'attachments', inner_key='stream'))
1731
                # TODO: handle articles with displayName but not content
1732
                and not source.html_to_text(crud_obj.get('content')).strip()):
1733
            error('Blank content and no image or video or audio', status=204)
1✔
1734

1735
        # DMs are only allowed to/from protocol bot accounts
1736
        if recip := as1.recipient_if_dm(obj.as1):
1✔
1737
            protocol_user_ids = PROTOCOL_DOMAINS + common.protocol_user_copy_ids()
1✔
1738
            if (not cls.SUPPORTS_DMS
1✔
1739
                    or (recip not in protocol_user_ids
1740
                        and as1.get_owner(obj.as1) not in protocol_user_ids)):
1741
                error(f"Bridgy Fed doesn't support DMs", status=204)
1✔
1742

1743

1744
@cloud_tasks_only(log=None)
1✔
1745
def receive_task():
1✔
1746
    """Task handler for a newly received :class:`models.Object`.
1747

1748
    Calls :meth:`Protocol.receive` with the form parameters.
1749

1750
    Parameters:
1751
      authed_as (str): passed to :meth:`Protocol.receive`
1752
      obj_id (str): key id of :class:`models.Object` to handle
1753
      received_at (str, ISO 8601 timestamp): when we first saw (received)
1754
        this activity
1755
      *: If ``obj_id`` is unset, all other parameters are properties for a new
1756
        :class:`models.Object` to handle
1757

1758
    TODO: migrate incoming webmentions to this. See how we did it for AP. The
1759
    difficulty is that parts of :meth:`protocol.Protocol.receive` depend on
1760
    setup in :func:`web.webmention`, eg :class:`models.Object` with ``new`` and
1761
    ``changed``, HTTP request details, etc. See stash for attempt at this for
1762
    :class:`web.Web`.
1763
    """
1764
    common.log_request()
1✔
1765
    form = request.form.to_dict()
1✔
1766

1767
    authed_as = form.pop('authed_as', None)
1✔
1768
    internal = (authed_as == common.PRIMARY_DOMAIN
1✔
1769
                or authed_as in common.PROTOCOL_DOMAINS)
1770

1771
    obj = Object.from_request()
1✔
1772
    assert obj
1✔
1773
    assert obj.source_protocol
1✔
1774
    obj.new = True
1✔
1775

1776
    if received_at := form.pop('received_at', None):
1✔
1777
        received_at = datetime.fromisoformat(received_at)
1✔
1778

1779
    try:
1✔
1780
        return PROTOCOLS[obj.source_protocol].receive(
1✔
1781
            obj=obj, authed_as=authed_as, internal=internal, received_at=received_at)
1782
    except RequestException as e:
1✔
1783
        util.interpret_http_exception(e)
1✔
1784
        error(e, status=304)
1✔
1785
    except ValueError as e:
1✔
UNCOV
1786
        logger.warning(e, exc_info=True)
×
UNCOV
1787
        error(e, status=304)
×
1788

1789

1790
@cloud_tasks_only(log=None)
1✔
1791
def send_task():
1✔
1792
    """Task handler for sending an activity to a single specific destination.
1793

1794
    Calls :meth:`Protocol.send` with the form parameters.
1795

1796
    Parameters:
1797
      protocol (str): :class:`Protocol` to send to
1798
      url (str): destination URL to send to
1799
      obj_id (str): key id of :class:`models.Object` to send
1800
      orig_obj_id (str): optional, :class:`models.Object` key id of the
1801
        "original object" that this object refers to, eg replies to or reposts
1802
        or likes
1803
      user (url-safe google.cloud.ndb.key.Key): :class:`models.User` (actor)
1804
        this activity is from
1805
      *: If ``obj_id`` is unset, all other parameters are properties for a new
1806
        :class:`models.Object` to handle
1807
    """
1808
    common.log_request()
1✔
1809

1810
    # prepare
1811
    form = request.form.to_dict()
1✔
1812
    url = form.get('url')
1✔
1813
    protocol = form.get('protocol')
1✔
1814
    if not url or not protocol:
1✔
1815
        logger.warning(f'Missing protocol or url; got {protocol} {url}')
1✔
1816
        return '', 204
1✔
1817

1818
    target = Target(uri=url, protocol=protocol)
1✔
1819
    obj = Object.from_request()
1✔
1820
    assert obj and obj.key and obj.key.id()
1✔
1821

1822
    PROTOCOLS[protocol].check_supported(obj)
1✔
1823
    allow_opt_out = (obj.type == 'delete')
1✔
1824

1825
    user = None
1✔
1826
    if user_key := form.get('user'):
1✔
1827
        key = ndb.Key(urlsafe=user_key)
1✔
1828
        # use get_by_id so that we follow use_instead
1829
        user = PROTOCOLS_BY_KIND[key.kind()].get_by_id(
1✔
1830
            key.id(), allow_opt_out=allow_opt_out)
1831

1832
    # send
1833
    delay = ''
1✔
1834
    if request.headers.get('X-AppEngine-TaskRetryCount') == '0' and obj.created:
1✔
1835
        delay_s = int((util.now().replace(tzinfo=None) - obj.created).total_seconds())
1✔
1836
        delay = f'({delay_s} s behind)'
1✔
1837
    logger.info(f'Sending {obj.source_protocol} {obj.type} {obj.key.id()} to {protocol} {url} {delay}')
1✔
1838
    logger.debug(f'  AS1: {json_dumps(obj.as1, indent=2)}')
1✔
1839
    sent = None
1✔
1840
    try:
1✔
1841
        sent = PROTOCOLS[protocol].send(obj, url, from_user=user,
1✔
1842
                                        orig_obj_id=form.get('orig_obj_id'))
1843
    except BaseException as e:
1✔
1844
        code, body = util.interpret_http_exception(e)
1✔
1845
        if not code and not body:
1✔
1846
            raise
1✔
1847

1848
    if sent is False:
1✔
1849
        logger.info(f'Failed sending!')
1✔
1850

1851
    return '', 200 if sent else 204 if sent is False else 304
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc