• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

snarfed / bridgy-fed / 056fc2b3-479c-4a47-90b2-7193e3f71196

01 Feb 2025 03:12AM UTC coverage: 93.124% (-0.2%) from 93.282%
056fc2b3-479c-4a47-90b2-7193e3f71196

push

circleci

snarfed
Protocol.check_supported: block blank posts

fixes #1737

4 of 4 new or added lines in 1 file covered. (100.0%)

8 existing lines in 1 file now uncovered.

4605 of 4945 relevant lines covered (93.12%)

0.93 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

94.01
/protocol.py
1
"""Base protocol class and common code."""
2
import copy
1✔
3
from datetime import datetime, timedelta, timezone
1✔
4
import logging
1✔
5
import os
1✔
6
import re
1✔
7
from threading import Lock
1✔
8
from urllib.parse import urljoin, urlparse
1✔
9

10
from cachetools import cached, LRUCache
1✔
11
from flask import request
1✔
12
from google.cloud import ndb
1✔
13
from google.cloud.ndb import OR
1✔
14
from google.cloud.ndb.model import _entity_to_protobuf
1✔
15
from granary import as1, as2, source
1✔
16
from granary.source import html_to_text
1✔
17
from oauth_dropins.webutil.appengine_info import DEBUG
1✔
18
from oauth_dropins.webutil.flask_util import cloud_tasks_only
1✔
19
from oauth_dropins.webutil import models
1✔
20
from oauth_dropins.webutil import util
1✔
21
from oauth_dropins.webutil.util import json_dumps, json_loads
1✔
22
from requests import RequestException
1✔
23
import werkzeug.exceptions
1✔
24
from werkzeug.exceptions import BadGateway, HTTPException
1✔
25

26
import common
1✔
27
from common import (
1✔
28
    DOMAIN_BLOCKLIST,
29
    DOMAIN_RE,
30
    DOMAINS,
31
    PRIMARY_DOMAIN,
32
    PROTOCOL_DOMAINS,
33
    report_error,
34
    subdomain_wrap,
35
)
36
import dms
1✔
37
import ids
1✔
38
from ids import (
1✔
39
    BOT_ACTOR_AP_IDS,
40
    normalize_user_id,
41
    translate_object_id,
42
    translate_user_id,
43
)
44
import memcache
1✔
45
from models import (
1✔
46
    DM,
47
    Follower,
48
    Object,
49
    PROTOCOLS,
50
    PROTOCOLS_BY_KIND,
51
    Target,
52
    User,
53
)
54

55
OBJECT_REFRESH_AGE = timedelta(days=30)
1✔
56
DELETE_TASK_DELAY = timedelta(minutes=2)
1✔
57
CREATE_MAX_AGE = timedelta(weeks=2)
1✔
58

59
# require a follow for users on these domains before we deliver anything from
60
# them other than their profile
61
LIMITED_DOMAINS = (os.getenv('LIMITED_DOMAINS', '').split()
1✔
62
                   or util.load_file_lines('limited_domains'))
63

64
DONT_STORE_AS1_TYPES = as1.CRUD_VERBS | set((
1✔
65
    'accept',
66
    'reject',
67
    'stop-following',
68
    'undo',
69
))
70
STORE_AS1_TYPES = (as1.ACTOR_TYPES | as1.POST_TYPES | as1.VERBS_WITH_OBJECT
1✔
71
                   - DONT_STORE_AS1_TYPES)
72

73
logger = logging.getLogger(__name__)
1✔
74

75

76
def error(*args, status=299, **kwargs):
1✔
77
    """Default HTTP status code to 299 to prevent retrying task."""
78
    return common.error(*args, status=status, **kwargs)
1✔
79

80

81
class ErrorButDoNotRetryTask(HTTPException):
1✔
82
    code = 299
1✔
83
    description = 'ErrorButDoNotRetryTask'
1✔
84

85
# https://github.com/pallets/flask/issues/1837#issuecomment-304996942
86
werkzeug.exceptions.default_exceptions.setdefault(299, ErrorButDoNotRetryTask)
1✔
87
werkzeug.exceptions._aborter.mapping.setdefault(299, ErrorButDoNotRetryTask)
1✔
88

89

90
def activity_id_memcache_key(id):
1✔
91
    return memcache.key(f'receive-{id}')
1✔
92

93

94
class Protocol:
1✔
95
    """Base protocol class. Not to be instantiated; classmethods only."""
96
    ABBREV = None
1✔
97
    """str: lower case abbreviation, used in URL paths"""
1✔
98
    PHRASE = None
1✔
99
    """str: human-readable name or phrase. Used in phrases like ``Follow this person on {PHRASE}``"""
1✔
100
    OTHER_LABELS = ()
1✔
101
    """sequence of str: label aliases"""
1✔
102
    LOGO_HTML = ''
1✔
103
    """str: logo emoji or ``<img>`` tag"""
1✔
104
    CONTENT_TYPE = None
1✔
105
    """str: MIME type of this protocol's native data format, appropriate for the ``Content-Type`` HTTP header."""
1✔
106
    HAS_COPIES = False
1✔
107
    """bool: whether this protocol is push and needs us to proactively create "copy" users and objects, as opposed to pulling converted objects on demand"""
1✔
108
    REQUIRES_AVATAR = False
1✔
109
    """bool: whether accounts on this protocol are required to have a profile picture. If they don't, their ``User.status`` will be ``blocked``."""
1✔
110
    REQUIRES_NAME = False
1✔
111
    """bool: whether accounts on this protocol are required to have a profile name that's different than their handle or id. If they don't, their ``User.status`` will be ``blocked``."""
1✔
112
    REQUIRES_OLD_ACCOUNT = False
1✔
113
    """bool: whether accounts on this protocol are required to be at least :const:`common.OLD_ACCOUNT_AGE` old. If their profile includes creation date and it's not old enough, their ``User.status`` will be ``blocked``."""
1✔
114
    DEFAULT_ENABLED_PROTOCOLS = ()
1✔
115
    """sequence of str: labels of other protocols that are automatically enabled for this protocol to bridge into"""
1✔
116
    DEFAULT_SERVE_USER_PAGES = False
1✔
117
    """bool: whether to serve user pages for all of this protocol's users on the fed.brid.gy. If ``False``, user pages will only be served for users who have explictly opted in."""
1✔
118
    SUPPORTED_AS1_TYPES = ()
1✔
119
    """sequence of str: AS1 objectTypes and verbs that this protocol supports receiving and sending"""
1✔
120
    SUPPORTS_DMS = False
1✔
121
    """bool: whether this protocol can receive DMs (chat messages)"""
1✔
122

123
    def __init__(self):
1✔
124
        assert False
×
125

126
    @classmethod
1✔
127
    @property
1✔
128
    def LABEL(cls):
1✔
129
        """str: human-readable lower case name of this protocol, eg ``'activitypub``"""
130
        return cls.__name__.lower()
1✔
131

132
    @staticmethod
1✔
133
    def for_request(fed=None):
1✔
134
        """Returns the protocol for the current request.
135

136
        ...based on the request's hostname.
137

138
        Args:
139
          fed (str or protocol.Protocol): protocol to return if the current
140
            request is on ``fed.brid.gy``
141

142
        Returns:
143
          Protocol: protocol, or None if the provided domain or request hostname
144
          domain is not a subdomain of ``brid.gy`` or isn't a known protocol
145
        """
146
        return Protocol.for_bridgy_subdomain(request.host, fed=fed)
1✔
147

148
    @staticmethod
1✔
149
    def for_bridgy_subdomain(domain_or_url, fed=None):
1✔
150
        """Returns the protocol for a brid.gy subdomain.
151

152
        Args:
153
          domain_or_url (str)
154
          fed (str or protocol.Protocol): protocol to return if the current
155
            request is on ``fed.brid.gy``
156

157
        Returns:
158
          class: :class:`Protocol` subclass, or None if the provided domain or request
159
          hostname domain is not a subdomain of ``brid.gy`` or isn't a known
160
          protocol
161
        """
162
        domain = (util.domain_from_link(domain_or_url, minimize=False)
1✔
163
                  if util.is_web(domain_or_url)
164
                  else domain_or_url)
165

166
        if domain == common.PRIMARY_DOMAIN or domain in common.LOCAL_DOMAINS:
1✔
167
            return PROTOCOLS[fed] if isinstance(fed, str) else fed
1✔
168
        elif domain and domain.endswith(common.SUPERDOMAIN):
1✔
169
            label = domain.removesuffix(common.SUPERDOMAIN)
1✔
170
            return PROTOCOLS.get(label)
1✔
171

172
    @classmethod
1✔
173
    def owns_id(cls, id):
1✔
174
        """Returns whether this protocol owns the id, or None if it's unclear.
175

176
        To be implemented by subclasses.
177

178
        IDs are string identities that uniquely identify users, and are intended
179
        primarily to be machine readable and usable. Compare to handles, which
180
        are human-chosen, human-meaningful, and often but not always unique.
181

182
        Some protocols' ids are more or less deterministic based on the id
183
        format, eg AT Protocol owns ``at://`` URIs. Others, like http(s) URLs,
184
        could be owned by eg Web or ActivityPub.
185

186
        This should be a quick guess without expensive side effects, eg no
187
        external HTTP fetches to fetch the id itself or otherwise perform
188
        discovery.
189

190
        Returns False if the id's domain is in :const:`common.DOMAIN_BLOCKLIST`.
191

192
        Args:
193
          id (str)
194

195
        Returns:
196
          bool or None:
197
        """
198
        return False
1✔
199

200
    @classmethod
1✔
201
    def owns_handle(cls, handle, allow_internal=False):
1✔
202
        """Returns whether this protocol owns the handle, or None if it's unclear.
203

204
        To be implemented by subclasses.
205

206
        Handles are string identities that are human-chosen, human-meaningful,
207
        and often but not always unique. Compare to IDs, which uniquely identify
208
        users, and are intended primarily to be machine readable and usable.
209

210
        Some protocols' handles are more or less deterministic based on the id
211
        format, eg ActivityPub (technically WebFinger) handles are
212
        ``@user@instance.com``. Others, like domains, could be owned by eg Web,
213
        ActivityPub, AT Protocol, or others.
214

215
        This should be a quick guess without expensive side effects, eg no
216
        external HTTP fetches to fetch the id itself or otherwise perform
217
        discovery.
218

219
        Args:
220
          handle (str)
221
          allow_internal (bool): whether to return False for internal domains
222
            like ``fed.brid.gy``, ``bsky.brid.gy``, etc
223

224
        Returns:
225
          bool or None
226
        """
227
        return False
1✔
228

229
    @classmethod
1✔
230
    def handle_to_id(cls, handle):
1✔
231
        """Converts a handle to an id.
232

233
        To be implemented by subclasses.
234

235
        May incur network requests, eg DNS queries or HTTP requests. Avoids
236
        blocked or opted out users.
237

238
        Args:
239
          handle (str)
240

241
        Returns:
242
          str: corresponding id, or None if the handle can't be found
243
        """
244
        raise NotImplementedError()
×
245

246
    @classmethod
1✔
247
    def key_for(cls, id, allow_opt_out=False):
1✔
248
        """Returns the :class:`google.cloud.ndb.Key` for a given id's :class:`models.User`.
249

250
        To be implemented by subclasses. Canonicalizes the id if necessary.
251

252
        If called via `Protocol.key_for`, infers the appropriate protocol with
253
        :meth:`for_id`. If called with a concrete subclass, uses that subclass
254
        as is.
255

256
        Args:
257
          id (str):
258
          allow_opt_out (bool): whether to allow users who are currently opted out
259

260
        Returns:
261
          google.cloud.ndb.Key: matching key, or None if the given id is not a
262
          valid :class:`User` id for this protocol.
263
        """
264
        if cls == Protocol:
1✔
265
            proto = Protocol.for_id(id)
1✔
266
            return proto.key_for(id, allow_opt_out=allow_opt_out) if proto else None
1✔
267

268
        # load user so that we follow use_instead
269
        existing = cls.get_by_id(id, allow_opt_out=True)
1✔
270
        if existing:
1✔
271
            if existing.status and not allow_opt_out:
1✔
272
                return None
1✔
273
            return existing.key
1✔
274

275
        return cls(id=id).key
1✔
276

277
    @staticmethod
1✔
278
    def _for_id_memcache_key(id, remote=None):
1✔
279
        """If id is a URL, uses its domain, otherwise returns None.
280

281
        Args:
282
          id (str)
283

284
        Returns:
285
          (str domain, bool remote) or None
286
        """
287
        if remote and util.is_web(id):
1✔
288
            return util.domain_from_link(id)
1✔
289

290
    @cached(LRUCache(20000), lock=Lock())
1✔
291
    @memcache.memoize(key=_for_id_memcache_key, write=lambda id, remote: remote,
1✔
292
                      version=3)
293
    @staticmethod
1✔
294
    def for_id(id, remote=True):
1✔
295
        """Returns the protocol for a given id.
296

297
        Args:
298
          id (str)
299
          remote (bool): whether to perform expensive side effects like fetching
300
            the id itself over the network, or other discovery.
301

302
        Returns:
303
          Protocol subclass: matching protocol, or None if no single known
304
          protocol definitively owns this id
305
        """
306
        logger.debug(f'Determining protocol for id {id}')
1✔
307
        if not id:
1✔
308
            return None
1✔
309

310
        # remove our synthetic id fragment, if any
311
        #
312
        # will this eventually cause false positives for other services that
313
        # include our full ids inside their own ids, non-URL-encoded? guess
314
        # we'll figure that out if/when it happens.
315
        id = id.partition('#bridgy-fed-')[0]
1✔
316

317
        if util.is_web(id):
1✔
318
            # step 1: check for our per-protocol subdomains
319
            try:
1✔
320
                is_homepage = urlparse(id).path.strip('/') == ''
1✔
321
            except ValueError as e:
1✔
322
                logger.info(f'urlparse ValueError: {e}')
1✔
323
                return None
1✔
324

325
            by_subdomain = Protocol.for_bridgy_subdomain(id)
1✔
326
            if by_subdomain and not is_homepage and id not in BOT_ACTOR_AP_IDS:
1✔
327
                logger.debug(f'  {by_subdomain.LABEL} owns id {id}')
1✔
328
                return by_subdomain
1✔
329

330
        # step 2: check if any Protocols say conclusively that they own it
331
        # sort to be deterministic
332
        protocols = sorted(set(p for p in PROTOCOLS.values() if p),
1✔
333
                           key=lambda p: p.LABEL)
334
        candidates = []
1✔
335
        for protocol in protocols:
1✔
336
            owns = protocol.owns_id(id)
1✔
337
            if owns:
1✔
338
                logger.debug(f'  {protocol.LABEL} owns id {id}')
1✔
339
                return protocol
1✔
340
            elif owns is not False:
1✔
341
                candidates.append(protocol)
1✔
342

343
        if len(candidates) == 1:
1✔
344
            logger.debug(f'  {candidates[0].LABEL} owns id {id}')
1✔
345
            return candidates[0]
1✔
346

347
        # step 3: look for existing Objects in the datastore
348
        obj = Protocol.load(id, remote=False)
1✔
349
        if obj and obj.source_protocol:
1✔
350
            logger.debug(f'  {obj.key.id()} owned by source_protocol {obj.source_protocol}')
1✔
351
            return PROTOCOLS[obj.source_protocol]
1✔
352

353
        # step 4: fetch over the network, if necessary
354
        if not remote:
1✔
355
            return None
1✔
356

357
        for protocol in candidates:
1✔
358
            logger.debug(f'Trying {protocol.LABEL}')
1✔
359
            try:
1✔
360
                obj = protocol.load(id, local=False, remote=True)
1✔
361

362
                if protocol.ABBREV == 'web':
1✔
363
                    # for web, if we fetch and get HTML without microformats,
364
                    # load returns False but the object will be stored in the
365
                    # datastore with source_protocol web, and in cache. load it
366
                    # again manually to check for that.
367
                    obj = Object.get_by_id(id)
1✔
368
                    if obj and obj.source_protocol != 'web':
1✔
369
                        obj = None
×
370

371
                if obj:
1✔
372
                    logger.debug(f'  {protocol.LABEL} owns id {id}')
1✔
373
                    return protocol
1✔
374
            except BadGateway:
1✔
375
                # we tried and failed fetching the id over the network.
376
                # this depends on ActivityPub.fetch raising this!
377
                return None
1✔
378
            except HTTPException as e:
×
379
                # internal error we generated ourselves; try next protocol
380
                pass
×
381
            except Exception as e:
×
382
                code, _ = util.interpret_http_exception(e)
×
383
                if code:
×
384
                    # we tried and failed fetching the id over the network
385
                    return None
×
386
                raise
×
387

388
        logger.info(f'No matching protocol found for {id} !')
1✔
389
        return None
1✔
390

391
    @cached(LRUCache(20000), lock=Lock())
1✔
392
    @staticmethod
1✔
393
    def for_handle(handle):
1✔
394
        """Returns the protocol for a given handle.
395

396
        May incur expensive side effects like resolving the handle itself over
397
        the network or other discovery.
398

399
        Args:
400
          handle (str)
401

402
        Returns:
403
          (Protocol subclass, str) tuple: matching protocol and optional id (if
404
          resolved), or ``(None, None)`` if no known protocol owns this handle
405
        """
406
        # TODO: normalize, eg convert domains to lower case
407
        logger.debug(f'Determining protocol for handle {handle}')
1✔
408
        if not handle:
1✔
409
            return (None, None)
1✔
410

411
        # step 1: check if any Protocols say conclusively that they own it.
412
        # sort to be deterministic.
413
        protocols = sorted(set(p for p in PROTOCOLS.values() if p),
1✔
414
                           key=lambda p: p.LABEL)
415
        candidates = []
1✔
416
        for proto in protocols:
1✔
417
            owns = proto.owns_handle(handle)
1✔
418
            if owns:
1✔
419
                logger.debug(f'  {proto.LABEL} owns handle {handle}')
1✔
420
                return (proto, None)
1✔
421
            elif owns is not False:
1✔
422
                candidates.append(proto)
1✔
423

424
        if len(candidates) == 1:
1✔
425
            logger.debug(f'  {candidates[0].LABEL} owns handle {handle}')
×
426
            return (candidates[0], None)
×
427

428
        # step 2: look for matching User in the datastore
429
        for proto in candidates:
1✔
430
            user = proto.query(proto.handle == handle).get()
1✔
431
            if user:
1✔
432
                if user.status:
1✔
433
                    return (None, None)
1✔
434
                logger.debug(f'  user {user.key} handle {handle}')
1✔
435
                return (proto, user.key.id())
1✔
436

437
        # step 3: resolve handle to id
438
        for proto in candidates:
1✔
439
            id = proto.handle_to_id(handle)
1✔
440
            if id:
1✔
441
                logger.debug(f'  {proto.LABEL} resolved handle {handle} to id {id}')
1✔
442
                return (proto, id)
1✔
443

444
        logger.info(f'No matching protocol found for handle {handle} !')
1✔
445
        return (None, None)
1✔
446

447
    @classmethod
1✔
448
    def bridged_web_url_for(cls, user, fallback=False):
1✔
449
        """Returns the web URL for a user's bridged profile in this protocol.
450

451
        For example, for Web user ``alice.com``, :meth:`ATProto.bridged_web_url_for`
452
        returns ``https://bsky.app/profile/alice.com.web.brid.gy``
453

454
        Args:
455
          user (models.User)
456
          fallback (bool): if True, and bridged users have no canonical user
457
            profile URL in this protocol, return the native protocol's profile URL
458

459
        Returns:
460
          str, or None if there isn't a canonical URL
461
        """
462
        if fallback:
1✔
463
            return user.web_url()
1✔
464

465
    @classmethod
1✔
466
    def actor_key(cls, obj, allow_opt_out=False):
1✔
467
        """Returns the :class:`User`: key for a given object's author or actor.
468

469
        Args:
470
          obj (models.Object)
471
          allow_opt_out (bool): whether to return a user key if they're opted out
472

473
        Returns:
474
          google.cloud.ndb.key.Key or None:
475
        """
476
        owner = as1.get_owner(obj.as1)
1✔
477
        if owner:
1✔
478
            return cls.key_for(owner, allow_opt_out=allow_opt_out)
1✔
479

480
    @classmethod
1✔
481
    def bot_user_id(cls):
1✔
482
        """Returns the Web user id for the bot user for this protocol.
483

484
        For example, ``'bsky.brid.gy'`` for ATProto.
485

486
        Returns:
487
          str:
488
        """
489
        return f'{cls.ABBREV}{common.SUPERDOMAIN}'
1✔
490

491
    @classmethod
1✔
492
    def create_for(cls, user):
1✔
493
        """Creates or re-activate a copy user in this protocol.
494

495
        Should add the copy user to :attr:`copies`.
496

497
        If the copy user already exists and active, should do nothing.
498

499
        Args:
500
          user (models.User): original source user. Shouldn't already have a
501
            copy user for this protocol in :attr:`copies`.
502

503
        Raises:
504
          ValueError: if we can't create a copy of the given user in this protocol
505
        """
506
        raise NotImplementedError()
×
507

508
    @classmethod
1✔
509
    def send(to_cls, obj, url, from_user=None, orig_obj_id=None):
1✔
510
        """Sends an outgoing activity.
511

512
        To be implemented by subclasses.
513

514
        NOTE: if this protocol's ``HAS_COPIES`` is True, and this method creates
515
        a copy and sends it, it *must* add that copy to the *object*'s (not
516
        activity's) :attr:`copies`!
517

518
        Args:
519
          obj (models.Object): with activity to send
520
          url (str): destination URL to send to
521
          from_user (models.User): user (actor) this activity is from
522
          orig_obj_id (str): :class:`models.Object` key id of the "original object"
523
            that this object refers to, eg replies to or reposts or likes
524

525
        Returns:
526
          bool: True if the activity is sent successfully, False if it is
527
          ignored or otherwise unsent due to protocol logic, eg no webmention
528
          endpoint, protocol doesn't support the activity type. (Failures are
529
          raised as exceptions.)
530

531
        Raises:
532
          werkzeug.HTTPException if the request fails
533
        """
534
        raise NotImplementedError()
×
535

536
    @classmethod
1✔
537
    def fetch(cls, obj, **kwargs):
1✔
538
        """Fetches a protocol-specific object and populates it in an :class:`Object`.
539

540
        Errors are raised as exceptions. If this method returns False, the fetch
541
        didn't fail but didn't succeed either, eg the id isn't valid for this
542
        protocol, or the fetch didn't return valid data for this protocol.
543

544
        To be implemented by subclasses.
545

546
        Args:
547
          obj (models.Object): with the id to fetch. Data is filled into one of
548
            the protocol-specific properties, eg ``as2``, ``mf2``, ``bsky``.
549
          kwargs: subclass-specific
550

551
        Returns:
552
          bool: True if the object was fetched and populated successfully,
553
          False otherwise
554

555
        Raises:
556
          requests.RequestException or werkzeug.HTTPException: if the fetch fails
557
        """
558
        raise NotImplementedError()
×
559

560
    @classmethod
1✔
561
    def convert(cls, obj, from_user=None, **kwargs):
1✔
562
        """Converts an :class:`Object` to this protocol's data format.
563

564
        For example, an HTML string for :class:`Web`, or a dict with AS2 JSON
565
        and ``application/activity+json`` for :class:`ActivityPub`.
566

567
        Just passes through to :meth:`_convert`, then does minor
568
        protocol-independent postprocessing.
569

570
        Args:
571
          obj (models.Object):
572
          from_user (models.User): user (actor) this activity/object is from
573
          kwargs: protocol-specific, passed through to :meth:`_convert`
574

575
        Returns:
576
          converted object in the protocol's native format, often a dict
577
        """
578
        if not obj or not obj.as1:
1✔
579
            return {}
1✔
580

581
        id = obj.key.id() if obj.key else obj.as1.get('id')
1✔
582
        is_activity = obj.as1.get('verb') in ('post', 'update')
1✔
583
        base_obj = as1.get_object(obj.as1) if is_activity else obj.as1
1✔
584
        orig_our_as1 = obj.our_as1
1✔
585

586
        # mark bridged actors as bots and add "bridged by Bridgy Fed" to their bios
587
        if (from_user and base_obj
1✔
588
            and base_obj.get('objectType') in as1.ACTOR_TYPES
589
            and PROTOCOLS.get(obj.source_protocol) != cls
590
            and Protocol.for_bridgy_subdomain(id) not in DOMAINS
591
            # Web users are special cased, they don't get the label if they've
592
            # explicitly enabled Bridgy Fed with redirects or webmentions
593
            and not (from_user.LABEL == 'web'
594
                     and (from_user.last_webmention_in or from_user.has_redirects))):
595

596
            obj.our_as1 = copy.deepcopy(obj.as1)
1✔
597
            actor = as1.get_object(obj.as1) if is_activity else obj.as1
1✔
598
            actor['objectType'] = 'person'
1✔
599
            cls.add_source_links(actor=actor, obj=obj, from_user=from_user)
1✔
600

601
        converted = cls._convert(obj, from_user=from_user, **kwargs)
1✔
602
        obj.our_as1 = orig_our_as1
1✔
603
        return converted
1✔
604

605
    @classmethod
1✔
606
    def _convert(cls, obj, from_user=None, **kwargs):
1✔
607
        """Converts an :class:`Object` to this protocol's data format.
608

609
        To be implemented by subclasses. Implementations should generally call
610
        :meth:`Protocol.translate_ids` (as their own class) before converting to
611
        their format.
612

613
        Args:
614
          obj (models.Object):
615
          from_user (models.User): user (actor) this activity/object is from
616
          kwargs: protocol-specific
617

618
        Returns:
619
          converted object in the protocol's native format, often a dict. May
620
            return the ``{}`` empty dict if the object can't be converted.
621
        """
622
        raise NotImplementedError()
×
623

624
    @classmethod
1✔
625
    def add_source_links(cls, actor, obj, from_user):
1✔
626
        """Adds "bridged from ... by Bridgy Fed" HTML to ``actor['summary']``.
627

628
        Default implementation; subclasses may override.
629

630
        Args:
631
          actor (dict): AS1 actor
632
          obj (models.Object):
633
          from_user (models.User): user (actor) this activity/object is from
634
        """
635
        assert from_user
1✔
636
        summary = actor.setdefault('summary', '')
1✔
637
        if 'Bridgy Fed]' in html_to_text(summary, ignore_links=True):
1✔
638
            return
1✔
639

640
        id = actor.get('id')
1✔
641
        proto_phrase = (PROTOCOLS[obj.source_protocol].PHRASE
1✔
642
                        if obj.source_protocol else '')
643
        if proto_phrase:
1✔
644
            proto_phrase = f' on {proto_phrase}'
1✔
645

646
        if from_user.key and id in (from_user.key.id(), from_user.profile_id()):
1✔
647
            source_links = f'[<a href="https://{PRIMARY_DOMAIN}{from_user.user_page_path()}">bridged</a> from <a href="{from_user.web_url()}">{from_user.handle}</a>{proto_phrase} by <a href="https://{PRIMARY_DOMAIN}/">Bridgy Fed</a>]'
1✔
648

649
        else:
650
            url = as1.get_url(actor) or id
1✔
651
            source = util.pretty_link(url) if url else '?'
1✔
652
            source_links = f'[bridged from {source}{proto_phrase} by <a href="https://{PRIMARY_DOMAIN}/">Bridgy Fed</a>]'
1✔
653

654
        if summary:
1✔
655
            summary += '<br><br>'
1✔
656
        actor['summary'] = summary + source_links
1✔
657

658
    @classmethod
1✔
659
    def set_username(to_cls, user, username):
1✔
660
        """Sets a custom username for a user's bridged account in this protocol.
661

662
        Args:
663
          user (models.User)
664
          username (str)
665

666
        Raises:
667
          ValueError: if the username is invalid
668
          RuntimeError: if the username could not be set
669
        """
670
        raise NotImplementedError()
1✔
671

672
    @classmethod
1✔
673
    def target_for(cls, obj, shared=False):
1✔
674
        """Returns an :class:`Object`'s delivery target (endpoint).
675

676
        To be implemented by subclasses.
677

678
        Examples:
679

680
        * If obj has ``source_protocol`` ``web``, returns its URL, as a
681
          webmention target.
682
        * If obj is an ``activitypub`` actor, returns its inbox.
683
        * If obj is an ``activitypub`` object, returns it's author's or actor's
684
          inbox.
685

686
        Args:
687
          obj (models.Object):
688
          shared (bool): optional. If True, returns a common/shared
689
            endpoint, eg ActivityPub's ``sharedInbox``, that can be reused for
690
            multiple recipients for efficiency
691

692
        Returns:
693
          str: target endpoint, or None if not available.
694
        """
695
        raise NotImplementedError()
×
696

697
    @classmethod
1✔
698
    def is_blocklisted(cls, url, allow_internal=False):
1✔
699
        """Returns True if we block the given URL and shouldn't deliver to it.
700

701
        Default implementation here, subclasses may override.
702

703
        Args:
704
          url (str):
705
          allow_internal (bool): whether to return False for internal domains
706
            like ``fed.brid.gy``, ``bsky.brid.gy``, etc
707
        """
708
        blocklist = DOMAIN_BLOCKLIST
1✔
709
        if not allow_internal:
1✔
710
            blocklist += DOMAINS
1✔
711
        return util.domain_or_parent_in(util.domain_from_link(url), blocklist)
1✔
712

713
    @classmethod
1✔
714
    def translate_ids(to_cls, obj):
1✔
715
        """Translates all ids in an AS1 object to a specific protocol.
716

717
        Infers source protocol for each id value separately.
718

719
        For example, if ``proto`` is :class:`ActivityPub`, the ATProto URI
720
        ``at://did:plc:abc/coll/123`` will be converted to
721
        ``https://bsky.brid.gy/ap/at://did:plc:abc/coll/123``.
722

723
        Wraps these AS1 fields:
724

725
        * ``id``
726
        * ``actor``
727
        * ``author``
728
        * ``bcc``
729
        * ``bto``
730
        * ``cc``
731
        * ``object``
732
        * ``object.actor``
733
        * ``object.author``
734
        * ``object.id``
735
        * ``object.inReplyTo``
736
        * ``object.object``
737
        * ``attachments[].id``
738
        * ``tags[objectType=mention].url``
739
        * ``to``
740

741
        This is the inverse of :meth:`models.Object.resolve_ids`. Much of the
742
        same logic is duplicated there!
743

744
        TODO: unify with :meth:`Object.resolve_ids`,
745
        :meth:`models.Object.normalize_ids`.
746

747
        Args:
748
          to_proto (Protocol subclass)
749
          obj (dict): AS1 object or activity (not :class:`models.Object`!)
750

751
        Returns:
752
          dict: wrapped AS1 version of ``obj``
753
        """
754
        assert to_cls != Protocol
1✔
755
        if not obj:
1✔
756
            return obj
1✔
757

758
        outer_obj = copy.deepcopy(obj)
1✔
759
        inner_objs = outer_obj['object'] = as1.get_objects(outer_obj)
1✔
760

761
        def translate(elem, field, fn, uri=False):
1✔
762
            elem[field] = as1.get_objects(elem, field)
1✔
763
            for obj in elem[field]:
1✔
764
                if id := obj.get('id'):
1✔
765
                    if field in ('to', 'cc', 'bcc', 'bto') and as1.is_audience(id):
1✔
766
                        continue
1✔
767
                    from_cls = Protocol.for_id(id)
1✔
768
                    # TODO: what if from_cls is None? relax translate_object_id,
769
                    # make it a noop if we don't know enough about from/to?
770
                    if from_cls and from_cls != to_cls:
1✔
771
                        obj['id'] = fn(id=id, from_=from_cls, to=to_cls)
1✔
772
                    if obj['id'] and uri:
1✔
773
                        obj['id'] = to_cls(id=obj['id']).id_uri()
1✔
774

775
            elem[field] = [o['id'] if o.keys() == {'id'} else o
1✔
776
                           for o in elem[field]]
777

778
            if len(elem[field]) == 1:
1✔
779
                elem[field] = elem[field][0]
1✔
780

781
        type = as1.object_type(outer_obj)
1✔
782
        translate(outer_obj, 'id',
1✔
783
                  translate_user_id if type in as1.ACTOR_TYPES
784
                  else translate_object_id)
785

786
        for o in inner_objs:
1✔
787
            is_actor = (as1.object_type(o) in as1.ACTOR_TYPES
1✔
788
                        or as1.get_owner(outer_obj) == o.get('id')
789
                        or type in ('follow', 'stop-following'))
790
            translate(o, 'id', translate_user_id if is_actor else translate_object_id)
1✔
791
            obj_is_actor = o.get('verb') in as1.VERBS_WITH_ACTOR_OBJECT
1✔
792
            translate(o, 'object', translate_user_id if obj_is_actor
1✔
793
                      else translate_object_id)
794

795
        for o in [outer_obj] + inner_objs:
1✔
796
            translate(o, 'inReplyTo', translate_object_id)
1✔
797
            for field in 'actor', 'author', 'to', 'cc', 'bto', 'bcc':
1✔
798
                translate(o, field, translate_user_id)
1✔
799
            for tag in as1.get_objects(o, 'tags'):
1✔
800
                if tag.get('objectType') == 'mention':
1✔
801
                    translate(tag, 'url', translate_user_id, uri=True)
1✔
802
            for att in as1.get_objects(o, 'attachments'):
1✔
803
                translate(att, 'id', translate_object_id)
1✔
804
                url = att.get('url')
1✔
805
                if url and not att.get('id'):
1✔
806
                    if from_cls := Protocol.for_id(url):
1✔
807
                        att['id'] = translate_object_id(from_=from_cls, to=to_cls,
1✔
808
                                                        id=url)
809

810
        outer_obj = util.trim_nulls(outer_obj)
1✔
811

812
        if objs := util.get_list(outer_obj ,'object'):
1✔
813
            outer_obj['object'] = [o['id'] if o.keys() == {'id'} else o for o in objs]
1✔
814
            if len(outer_obj['object']) == 1:
1✔
815
                outer_obj['object'] = outer_obj['object'][0]
1✔
816

817
        return outer_obj
1✔
818

819
    @classmethod
1✔
820
    def receive(from_cls, obj, authed_as=None, internal=False, received_at=None):
1✔
821
        """Handles an incoming activity.
822

823
        If ``obj``'s key is unset, ``obj.as1``'s id field is used. If both are
824
        unset, returns HTTP 299.
825

826
        Args:
827
          obj (models.Object)
828
          authed_as (str): authenticated actor id who sent this activity
829
          internal (bool): whether to allow activity ids on internal domains,
830
            from opted out/blocked users, etc.
831
          received_at (datetime): when we first saw (received) this activity.
832
            Right now only used for monitoring.
833

834
        Returns:
835
          (str, int) tuple: (response body, HTTP status code) Flask response
836

837
        Raises:
838
          werkzeug.HTTPException: if the request is invalid
839
        """
840
        # check some invariants
841
        assert from_cls != Protocol
1✔
842
        assert isinstance(obj, Object), obj
1✔
843

844
        if not obj.as1:
1✔
845
            error('No object data provided')
×
846

847
        id = None
1✔
848
        if obj.key and obj.key.id():
1✔
849
            id = obj.key.id()
1✔
850

851
        if not id:
1✔
852
            id = obj.as1.get('id')
1✔
853
            obj.key = ndb.Key(Object, id)
1✔
854

855
        if not id:
1✔
856
            error('No id provided')
×
857
        elif from_cls.owns_id(id) is False:
1✔
858
            error(f'Protocol {from_cls.LABEL} does not own id {id}')
1✔
859
        elif from_cls.is_blocklisted(id, allow_internal=internal):
1✔
860
            error(f'Activity {id} is blocklisted')
1✔
861
        # check that this activity is public. only do this for some activities,
862
        # not eg likes or follows, since Mastodon doesn't currently mark those
863
        # as explicitly public.
864
        elif (obj.type in set(('post', 'update')) | as1.POST_TYPES | as1.ACTOR_TYPES
1✔
865
                  and not as1.is_public(obj.as1, unlisted=False)
866
                  and not as1.is_dm(obj.as1)):
867
              logger.info('Dropping non-public activity')
1✔
868
              return ('OK', 200)
1✔
869

870
        # lease this object, atomically
871
        memcache_key = activity_id_memcache_key(id)
1✔
872
        leased = memcache.memcache.add(memcache_key, 'leased', noreply=False,
1✔
873
                                     expire=5 * 60)  # 5 min
874
        # short circuit if we've already seen this activity id.
875
        # (don't do this for bare objects since we need to check further down
876
        # whether they've been updated since we saw them last.)
877
        if (obj.as1.get('objectType') == 'activity'
1✔
878
            and 'force' not in request.values
879
            and (not leased
880
                 or (obj.new is False and obj.changed is False))):
881
            error(f'Already seen this activity {id}', status=204)
1✔
882

883
        pruned = {k: v for k, v in obj.as1.items()
1✔
884
                  if k not in ('contentMap', 'replies', 'signature')}
885
        delay = ''
1✔
886
        if (received_at and request.headers.get('X-AppEngine-TaskRetryCount') == '0'
1✔
887
                and obj.type != 'delete'):  # we delay deletes for 2m
888
            delay_s = int((util.now().replace(tzinfo=None)
×
889
                           - received_at.replace(tzinfo=None)
890
                           ).total_seconds())
891
            delay = f'({delay_s} s behind)'
×
892
        logger.info(f'Receiving {from_cls.LABEL} {obj.type} {id} {delay} AS1: {json_dumps(pruned, indent=2)}')
1✔
893

894
        # does this protocol support this activity/object type?
895
        from_cls.check_supported(obj)
1✔
896

897
        # check authorization
898
        # https://www.w3.org/wiki/ActivityPub/Primer/Authentication_Authorization
899
        actor = as1.get_owner(obj.as1)
1✔
900
        if not actor:
1✔
901
            error('Activity missing actor or author')
1✔
902
        elif from_cls.owns_id(actor) is False:
1✔
903
            error(f"{from_cls.LABEL} doesn't own actor {actor}, this is probably a bridged activity. Skipping.", status=204)
1✔
904

905
        assert authed_as
1✔
906
        assert isinstance(authed_as, str)
1✔
907
        authed_as = normalize_user_id(id=authed_as, proto=from_cls)
1✔
908
        actor = normalize_user_id(id=actor, proto=from_cls)
1✔
909
        if actor != authed_as:
1✔
910
            report_error("Auth: receive: authed_as doesn't match owner",
1✔
911
                         user=f'{id} authed_as {authed_as} owner {actor}')
912
            error(f"actor {actor} isn't authed user {authed_as}")
1✔
913

914
        # update copy ids to originals
915
        obj.normalize_ids()
1✔
916
        obj.resolve_ids()
1✔
917

918
        if (obj.type == 'follow'
1✔
919
                and Protocol.for_bridgy_subdomain(as1.get_object(obj.as1).get('id'))):
920
            # follows of bot user; refresh user profile first
921
            logger.info(f'Follow of bot user, reloading {actor}')
1✔
922
            from_user = from_cls.get_or_create(id=actor, allow_opt_out=True)
1✔
923
            from_user.reload_profile()
1✔
924
        else:
925
            # load actor user
926
            from_user = from_cls.get_or_create(id=actor, allow_opt_out=internal)
1✔
927

928
        if not internal and (not from_user
1✔
929
                             or from_user.manual_opt_out
930
                             # we want to override opt-out but not manual or blocked
931
                             or (from_user.status and from_user.status != 'opt-out')):
932
            error(f'Actor {actor} is opted out or blocked', status=204)
1✔
933

934
        # if this is an object, ie not an activity, wrap it in a create or update
935
        obj = from_cls.handle_bare_object(obj, authed_as=authed_as)
1✔
936
        obj.add('users', from_user.key)
1✔
937

938
        inner_obj_as1 = as1.get_object(obj.as1)
1✔
939
        inner_obj_id = inner_obj_as1.get('id')
1✔
940
        if obj.type in as1.CRUD_VERBS | as1.VERBS_WITH_OBJECT:
1✔
941
            if not inner_obj_id:
1✔
942
                error(f'{obj.type} object has no id!')
1✔
943

944
        # check age. we support backdated posts, but if they're over 2w old, we
945
        # don't deliver them
946
        if obj.type == 'post':
1✔
947
            if published := inner_obj_as1.get('published'):
1✔
UNCOV
948
                try:
×
UNCOV
949
                    published_dt = util.parse_iso8601(published)
×
UNCOV
950
                    if not published_dt.tzinfo:
×
UNCOV
951
                        published_dt = published_dt.replace(tzinfo=timezone.utc)
×
UNCOV
952
                    age = util.now() - published_dt
×
UNCOV
953
                    if age > CREATE_MAX_AGE:
×
UNCOV
954
                        error(f'Ignoring, too old, {age} is over {CREATE_MAX_AGE}',
×
955
                              status=204)
UNCOV
956
                except ValueError:  # from parse_iso8601
×
957
                    logger.debug(f"Couldn't parse published {published}")
×
958

959
        # write Object to datastore
960
        obj.source_protocol = from_cls.LABEL
1✔
961
        if obj.type in STORE_AS1_TYPES:
1✔
962
            obj.put()
1✔
963

964
        # store inner object
965
        # TODO: unify with big obj.type conditional below. would have to merge
966
        # this with the DM handling block lower down.
967
        crud_obj = None
1✔
968
        if obj.type in ('post', 'update') and inner_obj_as1.keys() > set(['id']):
1✔
969
            crud_obj = Object.get_or_create(inner_obj_id, our_as1=inner_obj_as1,
1✔
970
                                            source_protocol=from_cls.LABEL,
971
                                            authed_as=actor, users=[from_user.key])
972

973
        actor = as1.get_object(obj.as1, 'actor')
1✔
974
        actor_id = actor.get('id')
1✔
975

976
        # handle activity!
977
        if obj.type == 'stop-following':
1✔
978
            # TODO: unify with handle_follow?
979
            # TODO: handle multiple followees
980
            if not actor_id or not inner_obj_id:
1✔
981
                error(f'stop-following requires actor id and object id. Got: {actor_id} {inner_obj_id} {obj.as1}')
×
982

983
            # deactivate Follower
984
            from_ = from_cls.key_for(actor_id)
1✔
985
            to_cls = Protocol.for_id(inner_obj_id)
1✔
986
            to = to_cls.key_for(inner_obj_id)
1✔
987
            follower = Follower.query(Follower.to == to,
1✔
988
                                      Follower.from_ == from_,
989
                                      Follower.status == 'active').get()
990
            if follower:
1✔
991
                logger.info(f'Marking {follower} inactive')
1✔
992
                follower.status = 'inactive'
1✔
993
                follower.put()
1✔
994
            else:
995
                logger.warning(f'No Follower found for {from_} => {to}')
1✔
996

997
            # fall through to deliver to followee
998
            # TODO: do we convert stop-following to webmention 410 of original
999
            # follow?
1000

1001
            # fall through to deliver to followers
1002

1003
        elif obj.type in ('delete', 'undo'):
1✔
1004
            delete_obj_id = (from_user.profile_id()
1✔
1005
                            if inner_obj_id == from_user.key.id()
1006
                            else inner_obj_id)
1007

1008
            delete_obj = Object.get_by_id(delete_obj_id, authed_as=authed_as)
1✔
1009
            if not delete_obj:
1✔
1010
                logger.info(f"Ignoring, we don't have {delete_obj_id} stored")
1✔
1011
                return 'OK', 204
1✔
1012

1013
            # TODO: just delete altogether!
1014
            logger.info(f'Marking Object {delete_obj_id} deleted')
1✔
1015
            delete_obj.deleted = True
1✔
1016
            delete_obj.put()
1✔
1017

1018
            # if this is an actor, handle deleting it later so that
1019
            # in case it's from_user, user.enabled_protocols is still populated
1020
            #
1021
            # fall through to deliver to followers and delete copy if necessary.
1022
            # should happen via protocol-specific copy target and send of
1023
            # delete activity.
1024
            # https://github.com/snarfed/bridgy-fed/issues/63
1025

1026
        elif obj.type == 'block':
1✔
1027
            if proto := Protocol.for_bridgy_subdomain(inner_obj_id):
1✔
1028
                # blocking protocol bot user disables that protocol
1029
                from_user.delete(proto)
1✔
1030
                from_user.disable_protocol(proto)
1✔
1031
                return 'OK', 200
1✔
1032

1033
        elif obj.type == 'post':
1✔
1034
            # handle DMs to bot users
1035
            if as1.is_dm(obj.as1):
1✔
1036
                return dms.receive(from_user=from_user, obj=obj)
1✔
1037

1038
        # fetch actor if necessary
1039
        if (actor and actor.keys() == set(['id'])
1✔
1040
                and obj.type not in ('delete', 'undo')):
1041
            logger.debug('Fetching actor so we have name, profile photo, etc')
1✔
1042
            actor_obj = from_cls.load(ids.profile_id(id=actor['id'], proto=from_cls),
1✔
1043
                                      raise_=False)
1044
            if actor_obj and actor_obj.as1:
1✔
1045
                obj.our_as1 = {
1✔
1046
                    **obj.as1, 'actor': {
1047
                        **actor_obj.as1,
1048
                        # override profile id with actor id
1049
                        # https://github.com/snarfed/bridgy-fed/issues/1720
1050
                        'id': actor['id'],
1051
                    }
1052
                }
1053

1054
        # fetch object if necessary
1055
        if (obj.type in ('post', 'update', 'share')
1✔
1056
                and inner_obj_as1.keys() == set(['id'])
1057
                and from_cls.owns_id(inner_obj_id)):
1058
            logger.debug('Fetching inner object')
1✔
1059
            inner_obj = from_cls.load(inner_obj_id, raise_=False,
1✔
1060
                                      remote=(obj.type in ('post', 'update')))
1061
            if obj.type in ('post', 'update'):
1✔
1062
                crud_obj = inner_obj
1✔
1063
            if inner_obj and inner_obj.as1:
1✔
1064
                obj.our_as1 = {
1✔
1065
                    **obj.as1,
1066
                    'object': {
1067
                        **inner_obj_as1,
1068
                        **inner_obj.as1,
1069
                    }
1070
                }
1071
            elif obj.type in ('post', 'update'):
1✔
1072
                error("Need object {inner_obj_id} but couldn't fetch, giving up")
1✔
1073

1074
        if obj.type == 'follow':
1✔
1075
            if proto := Protocol.for_bridgy_subdomain(inner_obj_id):
1✔
1076
                # follow of one of our protocol bot users; enable that protocol.
1077
                # fall through so that we send an accept.
1078
                from_user.enable_protocol(proto)
1✔
1079
                proto.bot_follow(from_user)
1✔
1080

1081
            from_cls.handle_follow(obj)
1✔
1082

1083
        # deliver to targets
1084
        resp = from_cls.deliver(obj, from_user=from_user, crud_obj=crud_obj)
1✔
1085

1086
        # if this is a user, deactivate its followers/followings
1087
        # https://github.com/snarfed/bridgy-fed/issues/1304
1088
        if obj.type == 'delete':
1✔
1089
            if user_key := from_cls.key_for(id=inner_obj_id):
1✔
1090
                if user := user_key.get():
1✔
1091
                    for proto in user.enabled_protocols:
1✔
1092
                        user.disable_protocol(PROTOCOLS[proto])
1✔
1093

1094
                    logger.info(f'Deactivating Followers from or to {user_key.id()}')
1✔
1095
                    followers = Follower.query(
1✔
1096
                        OR(Follower.to == user_key, Follower.from_ == user_key)
1097
                        ).fetch()
1098
                    for f in followers:
1✔
1099
                        f.status = 'inactive'
1✔
1100
                    ndb.put_multi(followers)
1✔
1101

1102
        memcache.memcache.set(memcache_key, 'done', expire=7 * 24 * 60 * 60)  # 1w
1✔
1103
        return resp
1✔
1104

1105
    @classmethod
1✔
1106
    def handle_follow(from_cls, obj):
1✔
1107
        """Handles an incoming follow activity.
1108

1109
        Sends an ``Accept`` back, but doesn't send the ``Follow`` itself. That
1110
        happens in :meth:`deliver`.
1111

1112
        Args:
1113
          obj (models.Object): follow activity
1114
        """
1115
        logger.debug('Got follow. Loading users, storing Follow(s), sending accept(s)')
1✔
1116

1117
        # Prepare follower (from) users' data
1118
        # TODO: remove all of this and just use from_user
1119
        from_as1 = as1.get_object(obj.as1, 'actor')
1✔
1120
        from_id = from_as1.get('id')
1✔
1121
        if not from_id:
1✔
1122
            error(f'Follow activity requires actor. Got: {obj.as1}')
×
1123

1124
        from_obj = from_cls.load(from_id, raise_=False)
1✔
1125
        if not from_obj:
1✔
1126
            error(f"Couldn't load {from_id}", status=502)
×
1127

1128
        if not from_obj.as1:
1✔
1129
            from_obj.our_as1 = from_as1
1✔
1130
            from_obj.put()
1✔
1131

1132
        from_key = from_cls.key_for(from_id)
1✔
1133
        if not from_key:
1✔
1134
            error(f'Invalid {from_cls.LABEL} user key: {from_id}')
×
1135
        obj.users = [from_key]
1✔
1136
        from_user = from_cls.get_or_create(id=from_key.id(), obj=from_obj)
1✔
1137

1138
        # Prepare followee (to) users' data
1139
        to_as1s = as1.get_objects(obj.as1)
1✔
1140
        if not to_as1s:
1✔
1141
            error(f'Follow activity requires object(s). Got: {obj.as1}')
×
1142

1143
        # Store Followers
1144
        for to_as1 in to_as1s:
1✔
1145
            to_id = to_as1.get('id')
1✔
1146
            if not to_id:
1✔
1147
                error(f'Follow activity requires object(s). Got: {obj.as1}')
×
1148

1149
            logger.info(f'Follow {from_id} => {to_id}')
1✔
1150

1151
            to_cls = Protocol.for_id(to_id)
1✔
1152
            if not to_cls:
1✔
1153
                error(f"Couldn't determine protocol for {to_id}")
×
1154
            elif from_cls == to_cls:
1✔
1155
                logger.info(f'Skipping same-protocol Follower {from_id} => {to_id}')
1✔
1156
                continue
1✔
1157

1158
            to_obj = to_cls.load(to_id)
1✔
1159
            if to_obj and not to_obj.as1:
1✔
1160
                to_obj.our_as1 = to_as1
1✔
1161
                to_obj.put()
1✔
1162

1163
            to_key = to_cls.key_for(to_id)
1✔
1164
            if not to_key:
1✔
1165
                logger.info(f'Skipping invalid {from_cls.LABEL} user key: {from_id}')
×
1166
                continue
×
1167

1168
            to_user = to_cls.get_or_create(id=to_key.id(), obj=to_obj,
1✔
1169
                                           allow_opt_out=True)
1170
            follower_obj = Follower.get_or_create(to=to_user, from_=from_user,
1✔
1171
                                                  follow=obj.key, status='active')
1172
            obj.add('notify', to_key)
1✔
1173
            from_cls.maybe_accept_follow(follower=from_user, followee=to_user,
1✔
1174
                                         follow=obj)
1175

1176
    @classmethod
1✔
1177
    def maybe_accept_follow(_, follower, followee, follow):
1✔
1178
        """Sends an accept activity for a follow.
1179

1180
        ...if the follower protocol handles accepts. Otherwise, does nothing.
1181

1182
        Args:
1183
          follower: :class:`models.User`
1184
          followee: :class:`models.User`
1185
          follow: :class:`models.Object`
1186
        """
1187
        if 'accept' not in follower.SUPPORTED_AS1_TYPES:
1✔
1188
            return
1✔
1189

1190
        target = follower.target_for(follower.obj)
1✔
1191
        if not target:
1✔
1192
            error(f"Couldn't find delivery target for follower {follower.key.id()}")
×
1193

1194
        # send accept. note that this is one accept for the whole
1195
        # follow, even if it has multiple followees!
1196
        id = f'{followee.key.id()}/followers#accept-{follow.key.id()}'
1✔
1197
        accept = {
1✔
1198
            'id': id,
1199
            'objectType': 'activity',
1200
            'verb': 'accept',
1201
            'actor': followee.key.id(),
1202
            'object': follow.as1,
1203
        }
1204
        common.create_task(queue='send', id=id, our_as1=accept, url=target,
1✔
1205
                           protocol=follower.LABEL, user=followee.key.urlsafe())
1206

1207
    @classmethod
1✔
1208
    def bot_follow(bot_cls, user):
1✔
1209
        """Follow a user from a protocol bot user.
1210

1211
        ...so that the protocol starts sending us their activities, if it needs
1212
        a follow for that (eg ActivityPub).
1213

1214
        Args:
1215
          user (User)
1216
        """
1217
        from web import Web
1✔
1218
        bot = Web.get_by_id(bot_cls.bot_user_id())
1✔
1219
        now = util.now().isoformat()
1✔
1220
        logger.info(f'Following {user.key.id()} back from bot user {bot.key.id()}')
1✔
1221

1222
        if not user.obj:
1✔
1223
            logger.info("  can't follow, user has no profile obj")
1✔
1224
            return
1✔
1225

1226
        target = user.target_for(user.obj)
1✔
1227
        follow_back_id = f'https://{bot.key.id()}/#follow-back-{user.key.id()}-{now}'
1✔
1228
        follow_back_as1 = {
1✔
1229
            'objectType': 'activity',
1230
            'verb': 'follow',
1231
            'id': follow_back_id,
1232
            'actor': bot.key.id(),
1233
            'object': user.key.id(),
1234
        }
1235
        common.create_task(queue='send', id=follow_back_id,
1✔
1236
                           our_as1=follow_back_as1, url=target,
1237
                           source_protocol='web', protocol=user.LABEL,
1238
                           user=bot.key.urlsafe())
1239

1240
    @classmethod
1✔
1241
    def handle_bare_object(cls, obj, authed_as=None):
1✔
1242
        """If obj is a bare object, wraps it in a create or update activity.
1243

1244
        Checks if we've seen it before.
1245

1246
        Args:
1247
          obj (models.Object)
1248
          authed_as (str): authenticated actor id who sent this activity
1249

1250
        Returns:
1251
          models.Object: ``obj`` if it's an activity, otherwise a new object
1252
        """
1253
        is_actor = obj.type in as1.ACTOR_TYPES
1✔
1254
        if not is_actor and obj.type not in ('note', 'article', 'comment'):
1✔
1255
            return obj
1✔
1256

1257
        obj_actor = ids.normalize_user_id(id=as1.get_owner(obj.as1), proto=cls)
1✔
1258
        now = util.now().isoformat()
1✔
1259

1260
        # occasionally we override the object, eg if this is a profile object
1261
        # coming in via a user with use_instead set
1262
        obj_as1 = obj.as1
1✔
1263
        if obj_id := obj.key.id():
1✔
1264
            if obj_as1_id := obj_as1.get('id'):
1✔
1265
                if obj_id != obj_as1_id:
1✔
1266
                    logger.info(f'Overriding AS1 object id {obj_as1_id} with Object id {obj_id}')
1✔
1267
                    obj_as1['id'] = obj_id
1✔
1268

1269
        # this is a raw post; wrap it in a create or update activity
1270
        if obj.changed or is_actor:
1✔
1271
            if obj.changed:
1✔
1272
                logger.info(f'Content has changed from last time at {obj.updated}! Redelivering to all inboxes')
1✔
1273
            else:
1274
                logger.info(f'Got actor profile object, wrapping in update')
1✔
1275
            id = f'{obj.key.id()}#bridgy-fed-update-{now}'
1✔
1276
            update_as1 = {
1✔
1277
                'objectType': 'activity',
1278
                'verb': 'update',
1279
                'id': id,
1280
                'actor': obj_actor,
1281
                'object': {
1282
                    # Mastodon requires the updated field for Updates, so
1283
                    # add a default value.
1284
                    # https://docs.joinmastodon.org/spec/activitypub/#supported-activities-for-statuses
1285
                    # https://socialhub.activitypub.rocks/t/what-could-be-the-reason-that-my-update-activity-does-not-work/2893/4
1286
                    # https://github.com/mastodon/documentation/pull/1150
1287
                    'updated': now,
1288
                    **obj_as1,
1289
                },
1290
            }
1291
            logger.debug(f'  AS1: {json_dumps(update_as1, indent=2)}')
1✔
1292
            return Object(id=id, our_as1=update_as1,
1✔
1293
                          source_protocol=obj.source_protocol)
1294

1295
        if (obj.new
1✔
1296
                # HACK: force query param here is specific to webmention
1297
                or 'force' in request.form):
1298
            create_id = f'{obj.key.id()}#bridgy-fed-create'
1✔
1299
            create_as1 = {
1✔
1300
                'objectType': 'activity',
1301
                'verb': 'post',
1302
                'id': create_id,
1303
                'actor': obj_actor,
1304
                'object': obj_as1,
1305
                'published': now,
1306
            }
1307
            logger.info(f'Wrapping in post')
1✔
1308
            logger.debug(f'  AS1: {json_dumps(create_as1, indent=2)}')
1✔
1309
            return Object(id=create_id, our_as1=create_as1,
1✔
1310
                          source_protocol=obj.source_protocol)
1311

1312
        error(f'{obj.key.id()} is unchanged, nothing to do', status=204)
1✔
1313

1314
    @classmethod
1✔
1315
    def deliver(from_cls, obj, from_user, crud_obj=None, to_proto=None):
1✔
1316
        """Delivers an activity to its external recipients.
1317

1318
        Args:
1319
          obj (models.Object): activity to deliver
1320
          from_user (models.User): user (actor) this activity is from
1321
          crud_obj (models.Object): if this is a create, update, or delete/undo
1322
            activity, the inner object that's being written, otherwise None.
1323
            (This object's ``notify`` and ``feed`` properties may be updated.)
1324
          to_proto (protocol.Protocol): optional; if provided, only deliver to
1325
            targets on this protocol
1326

1327
        Returns:
1328
          (str, int) tuple: Flask response
1329
        """
1330
        if to_proto:
1✔
1331
            logger.info(f'Only delivering to {to_proto.LABEL}')
1✔
1332

1333
        # find delivery targets. maps Target to Object or None
1334
        #
1335
        # ...then write the relevant object, since targets() has a side effect of
1336
        # setting the notify and feed properties (and dirty attribute)
1337
        targets = from_cls.targets(obj, from_user=from_user, crud_obj=crud_obj)
1✔
1338
        if not targets:
1✔
1339
            return r'No targets, nothing to do ¯\_(ツ)_/¯', 204
1✔
1340

1341
        # store object that targets() updated
1342
        if crud_obj and crud_obj.dirty:
1✔
1343
            crud_obj.put()
1✔
1344
        elif obj.type in STORE_AS1_TYPES and obj.dirty:
1✔
1345
            obj.put()
1✔
1346

1347
        obj_params = ({'obj_id': obj.key.id()} if obj.type in STORE_AS1_TYPES
1✔
1348
                      else obj.to_request())
1349

1350
        # sort targets so order is deterministic for tests, debugging, etc
1351
        sorted_targets = sorted(targets.items(), key=lambda t: t[0].uri)
1✔
1352

1353
        # enqueue send task for each targets
1354
        logger.info(f'Delivering to: {[t for t, _ in sorted_targets]}')
1✔
1355
        user = from_user.key.urlsafe()
1✔
1356
        for i, (target, orig_obj) in enumerate(sorted_targets):
1✔
1357
            if to_proto and target.protocol != to_proto.LABEL:
1✔
1358
                continue
×
1359
            orig_obj_id = orig_obj.key.id() if orig_obj else None
1✔
1360
            common.create_task(queue='send', url=target.uri, protocol=target.protocol,
1✔
1361
                               orig_obj_id=orig_obj_id, user=user, **obj_params)
1362

1363
        return 'OK', 202
1✔
1364

1365
    @classmethod
1✔
1366
    def targets(from_cls, obj, from_user, crud_obj=None, internal=False):
1✔
1367
        """Collects the targets to send a :class:`models.Object` to.
1368

1369
        Targets are both objects - original posts, events, etc - and actors.
1370

1371
        Args:
1372
          obj (models.Object)
1373
          from_user (User)
1374
          crud_obj (models.Object): if this is a create, update, or delete/undo
1375
            activity, the inner object that's being written, otherwise None.
1376
            (This object's ``notify`` and ``feed`` properties may be updated.)
1377
          internal (bool): whether this is a recursive internal call
1378

1379
        Returns:
1380
          dict: maps :class:`models.Target` to original (in response to)
1381
          :class:`models.Object`, if any, otherwise None
1382
        """
1383
        logger.debug('Finding recipients and their targets')
1✔
1384

1385
        # we should only have crud_obj iff this is a create or update
1386
        assert (crud_obj is not None) == (obj.type in ('post', 'update')), obj.type
1✔
1387
        write_obj = crud_obj or obj
1✔
1388
        write_obj.dirty = False
1✔
1389

1390
        target_uris = sorted(set(as1.targets(obj.as1)))
1✔
1391
        logger.info(f'Raw targets: {target_uris}')
1✔
1392
        orig_obj = None
1✔
1393
        targets = {}  # maps Target to Object or None
1✔
1394
        owner = as1.get_owner(obj.as1)
1✔
1395
        allow_opt_out = (obj.type == 'delete')
1✔
1396
        inner_obj_as1 = as1.get_object(obj.as1)
1✔
1397
        inner_obj_id = inner_obj_as1.get('id')
1✔
1398
        in_reply_tos = as1.get_ids(inner_obj_as1, 'inReplyTo')
1✔
1399
        is_reply = obj.type == 'comment' or in_reply_tos
1✔
1400
        is_self_reply = False
1✔
1401

1402
        if is_reply:
1✔
1403
            original_ids = in_reply_tos
1✔
1404
        else:
1405
            if inner_obj_id == from_user.key.id():
1✔
1406
                inner_obj_id = from_user.profile_id()
1✔
1407
            original_ids = [inner_obj_id]
1✔
1408

1409
        # which protocols should we allow delivering to?
1410
        to_protocols = []
1✔
1411
        for label in (list(from_user.DEFAULT_ENABLED_PROTOCOLS)
1✔
1412
                      + from_user.enabled_protocols):
1413
            proto = PROTOCOLS[label]
1✔
1414
            if proto.HAS_COPIES and (obj.type in ('update', 'delete', 'share', 'undo')
1✔
1415
                                     or is_reply):
1416
                for id in original_ids:
1✔
1417
                    if Protocol.for_id(id) == proto:
1✔
1418
                        logger.info(f'Allowing {label} for original post {id}')
1✔
1419
                        break
1✔
1420
                    elif orig := from_user.load(id, remote=False):
1✔
1421
                        if orig.get_copy(proto):
1✔
1422
                            logger.info(f'Allowing {label}, original post {id} was bridged there')
1✔
1423
                            break
1✔
1424
                else:
1425
                    logger.info(f"Skipping {label}, original objects {original_ids} weren't bridged there")
1✔
1426
                    continue
1✔
1427

1428
            util.add(to_protocols, proto)
1✔
1429

1430
        # process direct targets
1431
        for id in sorted(target_uris):
1✔
1432
            target_proto = Protocol.for_id(id)
1✔
1433
            if not target_proto:
1✔
1434
                logger.info(f"Can't determine protocol for {id}")
1✔
1435
                continue
1✔
1436
            elif target_proto.is_blocklisted(id):
1✔
1437
                logger.debug(f'{id} is blocklisted')
1✔
1438
                continue
1✔
1439

1440
            orig_obj = target_proto.load(id, raise_=False)
1✔
1441
            if not orig_obj or not orig_obj.as1:
1✔
1442
                logger.info(f"Couldn't load {id}")
1✔
1443
                continue
1✔
1444

1445
            target_author_key = target_proto.actor_key(orig_obj)
1✔
1446
            if not from_user.is_enabled(target_proto):
1✔
1447
                # if author isn't bridged and inReplyTo author is, DM a prompt
1448
                if id in in_reply_tos:
1✔
1449
                    if target_author := target_author_key.get():
1✔
1450
                        if target_author.is_enabled(from_cls):
1✔
1451
                            dms.maybe_send(
1✔
1452
                                from_proto=target_proto, to_user=from_user,
1453
                                type='replied_to_bridged_user', text=f"""\
1454
Hi! You <a href="{inner_obj_as1.get('url') or inner_obj_id}">recently replied</a> to {orig_obj.actor_link(image=False)}, who's bridged here from {target_proto.PHRASE}. If you want them to see your replies, you can bridge your account into {target_proto.PHRASE} by following this account. <a href="https://fed.brid.gy/docs">See the docs</a> for more information.""")
1455

1456
                continue
1✔
1457

1458
            # deliver self-replies to followers
1459
            # https://github.com/snarfed/bridgy-fed/issues/639
1460
            if id in in_reply_tos and owner == as1.get_owner(orig_obj.as1):
1✔
1461
                is_self_reply = True
1✔
1462
                logger.info(f'self reply!')
1✔
1463

1464
            # also add copies' targets
1465
            for copy in orig_obj.copies:
1✔
1466
                proto = PROTOCOLS[copy.protocol]
1✔
1467
                if proto in to_protocols:
1✔
1468
                    # copies generally won't have their own Objects
1469
                    if target := proto.target_for(Object(id=copy.uri)):
1✔
1470
                        logger.debug(f'Adding target {target} for copy {copy.uri} of original {id}')
1✔
1471
                        targets[Target(protocol=copy.protocol, uri=target)] = orig_obj
1✔
1472

1473
            if target_proto == from_cls:
1✔
1474
                logger.debug(f'Skipping same-protocol target {id}')
1✔
1475
                continue
1✔
1476

1477
            target = target_proto.target_for(orig_obj)
1✔
1478
            if not target:
1✔
1479
                # TODO: surface errors like this somehow?
1480
                logger.error(f"Can't find delivery target for {id}")
×
1481
                continue
×
1482

1483
            logger.debug(f'Target for {id} is {target}')
1✔
1484
            # only use orig_obj for inReplyTos, like/repost objects, etc
1485
            # https://github.com/snarfed/bridgy-fed/issues/1237
1486
            targets[Target(protocol=target_proto.LABEL, uri=target)] = (
1✔
1487
                orig_obj if id in in_reply_tos or id in as1.get_ids(obj.as1, 'object')
1488
                else None)
1489

1490
            if target_author_key:
1✔
1491
                logger.debug(f'Recipient is {target_author_key}')
1✔
1492
                if write_obj.add('notify', target_author_key):
1✔
1493
                    write_obj.dirty = True
1✔
1494

1495
        if obj.type == 'undo':
1✔
1496
            logger.debug('Object is an undo; adding targets for inner object')
1✔
1497
            if set(inner_obj_as1.keys()) == {'id'}:
1✔
1498
                inner_obj = from_cls.load(inner_obj_id, raise_=False)
1✔
1499
            else:
1500
                inner_obj = Object(id=inner_obj_id, our_as1=inner_obj_as1)
1✔
1501
            if inner_obj:
1✔
1502
                targets.update(from_cls.targets(inner_obj, from_user=from_user,
1✔
1503
                                                internal=True))
1504

1505
        logger.info(f'Direct targets: {[t.uri for t in targets.keys()]}')
1✔
1506

1507
        # deliver to followers, if appropriate
1508
        user_key = from_cls.actor_key(obj, allow_opt_out=allow_opt_out)
1✔
1509
        if not user_key:
1✔
1510
            logger.info("Can't tell who this is from! Skipping followers.")
1✔
1511
            return targets
1✔
1512

1513
        followers = []
1✔
1514
        if (obj.type in ('post', 'update', 'delete', 'share', 'undo')
1✔
1515
                and (not is_reply or is_self_reply)):
1516
            logger.info(f'Delivering to followers of {user_key}')
1✔
1517
            followers = [
1✔
1518
                f for f in Follower.query(Follower.to == user_key,
1519
                                          Follower.status == 'active')
1520
                # skip protocol bot users
1521
                if not Protocol.for_bridgy_subdomain(f.from_.id())
1522
                # skip protocols this user hasn't enabled, or where the base
1523
                # object of this activity hasn't been bridged
1524
                and PROTOCOLS_BY_KIND[f.from_.kind()] in to_protocols]
1525
            user_keys = [f.from_ for f in followers]
1✔
1526
            users = [u for u in ndb.get_multi(user_keys) if u]
1✔
1527
            User.load_multi(users)
1✔
1528

1529
            if (not followers and
1✔
1530
                (util.domain_or_parent_in(
1531
                    util.domain_from_link(from_user.key.id()), LIMITED_DOMAINS)
1532
                 or util.domain_or_parent_in(
1533
                     util.domain_from_link(obj.key.id()), LIMITED_DOMAINS))):
1534
                logger.info(f'skipping, {from_user.key.id()} is on a limited domain and has no followers')
1✔
1535
                return {}
1✔
1536

1537
            # add to followers' feeds, if any
1538
            if not internal and obj.type in ('post', 'update', 'share'):
1✔
1539
                if write_obj.type not in as1.ACTOR_TYPES:
1✔
1540
                    write_obj.feed = [u.key for u in users]
1✔
1541
                    if write_obj.feed:
1✔
1542
                        write_obj.dirty = True
1✔
1543

1544
            # collect targets for followers
1545
            for user in users:
1✔
1546
                # TODO: should we pass remote=False through here to Protocol.load?
1547
                target = user.target_for(user.obj, shared=True) if user.obj else None
1✔
1548
                if not target:
1✔
1549
                    # TODO: surface errors like this somehow?
1550
                    logger.error(f'Follower {user.key} has no delivery target')
1✔
1551
                    continue
1✔
1552

1553
                # normalize URL (lower case hostname, etc)
1554
                # ...but preserve our PDS URL without trailing slash in path
1555
                # https://atproto.com/specs/did#did-documents
1556
                target = util.dedupe_urls([target], trailing_slash=False)[0]
1✔
1557

1558
                targets[Target(protocol=user.LABEL, uri=target)] = \
1✔
1559
                    Object.get_by_id(inner_obj_id) if obj.type == 'share' else None
1560

1561
        # deliver to enabled HAS_COPIES protocols proactively
1562
        # TODO: abstract for other protocols
1563
        from atproto import ATProto
1✔
1564
        if (ATProto in to_protocols
1✔
1565
                and obj.type in ('post', 'update', 'delete', 'share')):
1566
            logger.info(f'user has ATProto enabled, adding {ATProto.PDS_URL}')
1✔
1567
            targets.setdefault(
1✔
1568
                Target(protocol=ATProto.LABEL, uri=ATProto.PDS_URL), None)
1569

1570
        # de-dupe targets, discard same-domain
1571
        # maps string target URL to (Target, Object) tuple
1572
        candidates = {t.uri: (t, obj) for t, obj in targets.items()}
1✔
1573
        # maps Target to Object or None
1574
        targets = {}
1✔
1575
        source_domains = [
1✔
1576
            util.domain_from_link(url) for url in
1577
            (obj.as1.get('id'), obj.as1.get('url'), as1.get_owner(obj.as1))
1578
            if util.is_web(url)
1579
        ]
1580
        for url in sorted(util.dedupe_urls(
1✔
1581
                candidates.keys(),
1582
                # preserve our PDS URL without trailing slash in path
1583
                # https://atproto.com/specs/did#did-documents
1584
                trailing_slash=False)):
1585
            if util.is_web(url) and util.domain_from_link(url) in source_domains:
1✔
1586
                logger.info(f'Skipping same-domain target {url}')
×
1587
                continue
×
1588
            target, obj = candidates[url]
1✔
1589
            targets[target] = obj
1✔
1590

1591
        return targets
1✔
1592

1593
    @classmethod
1✔
1594
    def load(cls, id, remote=None, local=True, raise_=True, **kwargs):
1✔
1595
        """Loads and returns an Object from datastore or HTTP fetch.
1596

1597
        Sets the :attr:`new` and :attr:`changed` attributes if we know either
1598
        one for the loaded object, ie local is True and remote is True or None.
1599

1600
        Args:
1601
          id (str)
1602
          remote (bool): whether to fetch the object over the network. If True,
1603
            fetches even if we already have the object stored, and updates our
1604
            stored copy. If False and we don't have the object stored, returns
1605
            None. Default (None) means to fetch over the network only if we
1606
            don't already have it stored.
1607
          local (bool): whether to load from the datastore before
1608
            fetching over the network. If False, still stores back to the
1609
            datastore after a successful remote fetch.
1610
          raise_ (bool): if False, catches any :class:`request.RequestException`
1611
            or :class:`HTTPException` raised by :meth:`fetch()` and returns
1612
            ``None`` instead
1613
          kwargs: passed through to :meth:`fetch()`
1614

1615
        Returns:
1616
          models.Object: loaded object, or None if it isn't fetchable, eg a
1617
          non-URL string for Web, or ``remote`` is False and it isn't in the
1618
          datastore
1619

1620
        Raises:
1621
          requests.HTTPError: anything that :meth:`fetch` raises, if ``raise_``
1622
            is True
1623
        """
1624
        assert id
1✔
1625
        assert local or remote is not False
1✔
1626
        # logger.debug(f'Loading Object {id} local={local} remote={remote}')
1627

1628
        obj = orig_as1 = None
1✔
1629
        if local:
1✔
1630
            obj = Object.get_by_id(id)
1✔
1631
            if not obj:
1✔
1632
                # logger.debug(f' {id} not in datastore')
1633
                pass
1✔
1634
            elif obj.as1 or obj.raw or obj.deleted:
1✔
1635
                # logger.debug(f'  {id} got from datastore')
1636
                obj.new = False
1✔
1637

1638
        if remote is False:
1✔
1639
            return obj
1✔
1640
        elif remote is None and obj:
1✔
1641
            if obj.updated < util.as_utc(util.now() - OBJECT_REFRESH_AGE):
1✔
1642
                # logger.debug(f'  last updated {obj.updated}, refreshing')
1643
                pass
1✔
1644
            else:
1645
                return obj
1✔
1646

1647
        if obj:
1✔
1648
            orig_as1 = obj.as1
1✔
1649
            obj.our_as1 = None
1✔
1650
            obj.new = False
1✔
1651
        else:
1652
            obj = Object(id=id)
1✔
1653
            if local:
1✔
1654
                # logger.debug(f'  {id} not in datastore')
1655
                obj.new = True
1✔
1656
                obj.changed = False
1✔
1657

1658
        try:
1✔
1659
            fetched = cls.fetch(obj, **kwargs)
1✔
1660
        except (RequestException, HTTPException) as e:
1✔
1661
            if raise_:
1✔
1662
                raise
1✔
1663
            util.interpret_http_exception(e)
1✔
1664
            return None
1✔
1665

1666
        if not fetched:
1✔
1667
            return None
1✔
1668

1669
        # https://stackoverflow.com/a/3042250/186123
1670
        size = len(_entity_to_protobuf(obj)._pb.SerializeToString())
1✔
1671
        if size > models.MAX_ENTITY_SIZE:
1✔
1672
            logger.warning(f'Object is too big! {size} bytes is over {models.MAX_ENTITY_SIZE}')
1✔
1673
            return None
1✔
1674

1675
        obj.resolve_ids()
1✔
1676
        obj.normalize_ids()
1✔
1677

1678
        if obj.new is False:
1✔
1679
            obj.changed = obj.activity_changed(orig_as1)
1✔
1680

1681
        if obj.source_protocol not in (cls.LABEL, cls.ABBREV):
1✔
1682
            if obj.source_protocol:
1✔
1683
                logger.warning(f'Object {obj.key.id()} changed protocol from {obj.source_protocol} to {cls.LABEL} ?!')
×
1684
            obj.source_protocol = cls.LABEL
1✔
1685

1686
        obj.put()
1✔
1687
        return obj
1✔
1688

1689
    @classmethod
1✔
1690
    def check_supported(cls, obj):
1✔
1691
        """If this protocol doesn't support this object, raises HTTP 204.
1692

1693
        Also reports an error.
1694

1695
        (This logic is duplicated in some protocols, eg ActivityPub, so that
1696
        they can short circuit out early. It generally uses their native formats
1697
        instead of AS1, before an :class:`models.Object` is created.)
1698

1699
        Args:
1700
          obj (Object)
1701

1702
        Raises:
1703
          werkzeug.HTTPException: if this protocol doesn't support this object
1704
        """
1705
        if not obj.type:
1✔
1706
            return
×
1707

1708
        inner_type = as1.object_type(as1.get_object(obj.as1)) or ''
1✔
1709
        if (obj.type not in cls.SUPPORTED_AS1_TYPES
1✔
1710
            or (obj.type in as1.CRUD_VERBS
1711
                and inner_type
1712
                and inner_type not in cls.SUPPORTED_AS1_TYPES)):
1713
            error(f"Bridgy Fed for {cls.LABEL} doesn't support {obj.type} {inner_type} yet", status=204)
1✔
1714

1715
        # don't allow posts with blank content and no image/video/audio
1716
        crud_obj = (as1.get_object(obj.as1) if obj.type in ('post', 'update')
1✔
1717
                    else obj.as1)
1718
        if (crud_obj.get('objectType') in as1.POST_TYPES
1✔
1719
                and not util.get_url(crud_obj, key='image')
1720
                and not any(util.get_urls(crud_obj, 'attachments', inner_key='stream'))
1721
                # TODO: handle articles with displayName but not content
1722
                and not source.html_to_text(crud_obj.get('content')).strip()):
1723
            error('Blank content and no image or video or audio', status=204)
1✔
1724

1725
        # DMs are only allowed to/from protocol bot accounts
1726
        if recip := as1.recipient_if_dm(obj.as1):
1✔
1727
            protocol_user_ids = PROTOCOL_DOMAINS + common.protocol_user_copy_ids()
1✔
1728
            if (not cls.SUPPORTS_DMS
1✔
1729
                    or (recip not in protocol_user_ids
1730
                        and as1.get_owner(obj.as1) not in protocol_user_ids)):
1731
                error(f"Bridgy Fed doesn't support DMs", status=204)
1✔
1732

1733

1734
@cloud_tasks_only(log=None)
1✔
1735
def receive_task():
1✔
1736
    """Task handler for a newly received :class:`models.Object`.
1737

1738
    Calls :meth:`Protocol.receive` with the form parameters.
1739

1740
    Parameters:
1741
      authed_as (str): passed to :meth:`Protocol.receive`
1742
      obj_id (str): key id of :class:`models.Object` to handle
1743
      received_at (str, ISO 8601 timestamp): when we first saw (received)
1744
        this activity
1745
      *: If ``obj_id`` is unset, all other parameters are properties for a new
1746
        :class:`models.Object` to handle
1747

1748
    TODO: migrate incoming webmentions to this. See how we did it for AP. The
1749
    difficulty is that parts of :meth:`protocol.Protocol.receive` depend on
1750
    setup in :func:`web.webmention`, eg :class:`models.Object` with ``new`` and
1751
    ``changed``, HTTP request details, etc. See stash for attempt at this for
1752
    :class:`web.Web`.
1753
    """
1754
    common.log_request()
1✔
1755
    form = request.form.to_dict()
1✔
1756

1757
    authed_as = form.pop('authed_as', None)
1✔
1758
    internal = (authed_as == common.PRIMARY_DOMAIN
1✔
1759
                or authed_as in common.PROTOCOL_DOMAINS)
1760

1761
    obj = Object.from_request()
1✔
1762
    assert obj
1✔
1763
    assert obj.source_protocol
1✔
1764
    obj.new = True
1✔
1765

1766
    if received_at := form.pop('received_at', None):
1✔
1767
        received_at = datetime.fromisoformat(received_at)
1✔
1768

1769
    try:
1✔
1770
        return PROTOCOLS[obj.source_protocol].receive(
1✔
1771
            obj=obj, authed_as=authed_as, internal=internal, received_at=received_at)
1772
    except RequestException as e:
1✔
1773
        util.interpret_http_exception(e)
1✔
1774
        error(e, status=304)
1✔
1775
    except ValueError as e:
1✔
1776
        logger.warning(e, exc_info=True)
×
1777
        error(e, status=304)
×
1778

1779

1780
@cloud_tasks_only(log=None)
1✔
1781
def send_task():
1✔
1782
    """Task handler for sending an activity to a single specific destination.
1783

1784
    Calls :meth:`Protocol.send` with the form parameters.
1785

1786
    Parameters:
1787
      protocol (str): :class:`Protocol` to send to
1788
      url (str): destination URL to send to
1789
      obj_id (str): key id of :class:`models.Object` to send
1790
      orig_obj_id (str): optional, :class:`models.Object` key id of the
1791
        "original object" that this object refers to, eg replies to or reposts
1792
        or likes
1793
      user (url-safe google.cloud.ndb.key.Key): :class:`models.User` (actor)
1794
        this activity is from
1795
      *: If ``obj_id`` is unset, all other parameters are properties for a new
1796
        :class:`models.Object` to handle
1797
    """
1798
    common.log_request()
1✔
1799

1800
    # prepare
1801
    form = request.form.to_dict()
1✔
1802
    url = form.get('url')
1✔
1803
    protocol = form.get('protocol')
1✔
1804
    if not url or not protocol:
1✔
1805
        logger.warning(f'Missing protocol or url; got {protocol} {url}')
1✔
1806
        return '', 204
1✔
1807

1808
    target = Target(uri=url, protocol=protocol)
1✔
1809
    obj = Object.from_request()
1✔
1810
    assert obj and obj.key and obj.key.id()
1✔
1811

1812
    PROTOCOLS[protocol].check_supported(obj)
1✔
1813
    allow_opt_out = (obj.type == 'delete')
1✔
1814

1815
    user = None
1✔
1816
    if user_key := form.get('user'):
1✔
1817
        key = ndb.Key(urlsafe=user_key)
1✔
1818
        # use get_by_id so that we follow use_instead
1819
        user = PROTOCOLS_BY_KIND[key.kind()].get_by_id(
1✔
1820
            key.id(), allow_opt_out=allow_opt_out)
1821

1822
    # send
1823
    delay = ''
1✔
1824
    if request.headers.get('X-AppEngine-TaskRetryCount') == '0' and obj.created:
1✔
1825
        delay_s = int((util.now().replace(tzinfo=None) - obj.created).total_seconds())
1✔
1826
        delay = f'({delay_s} s behind)'
1✔
1827
    logger.info(f'Sending {obj.source_protocol} {obj.type} {obj.key.id()} to {protocol} {url} {delay}')
1✔
1828
    logger.debug(f'  AS1: {json_dumps(obj.as1, indent=2)}')
1✔
1829
    sent = None
1✔
1830
    try:
1✔
1831
        sent = PROTOCOLS[protocol].send(obj, url, from_user=user,
1✔
1832
                                        orig_obj_id=form.get('orig_obj_id'))
1833
    except BaseException as e:
1✔
1834
        code, body = util.interpret_http_exception(e)
1✔
1835
        if not code and not body:
1✔
1836
            raise
1✔
1837

1838
    if sent is False:
1✔
1839
        logger.info(f'Failed sending!')
1✔
1840

1841
    return '', 200 if sent else 204 if sent is False else 304
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc