• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

snarfed / bridgy-fed / 45c3fb42-19b0-469f-a9ad-a82dec7dff4e

24 Oct 2025 07:11PM UTC coverage: 92.884% (-0.02%) from 92.908%
45c3fb42-19b0-469f-a9ad-a82dec7dff4e

push

circleci

snarfed
temporarily disable ATProto blob refetching

for #2163

5991 of 6450 relevant lines covered (92.88%)

0.93 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

95.74
/models.py
1
"""Datastore model classes."""
2
import copy
1✔
3
from datetime import timedelta, timezone
1✔
4
from functools import lru_cache
1✔
5
import itertools
1✔
6
import json
1✔
7
import logging
1✔
8
import random
1✔
9
import re
1✔
10
from threading import Lock
1✔
11
from urllib.parse import quote, urlparse
1✔
12

13
from arroba.util import parse_at_uri
1✔
14
import cachetools
1✔
15
from Crypto.PublicKey import RSA
1✔
16
from flask import request
1✔
17
from google.cloud import ndb
1✔
18
from google.cloud.ndb.key import _MAX_KEYPART_BYTES
1✔
19
from granary import as1, as2, atom, bluesky, microformats2
1✔
20
from granary.bluesky import AT_URI_PATTERN, BSKY_APP_URL_RE
1✔
21
import granary.nostr
1✔
22
from granary.source import html_to_text
1✔
23
import humanize
1✔
24
from oauth_dropins.webutil import util
1✔
25
from oauth_dropins.webutil.appengine_info import DEBUG
1✔
26
from oauth_dropins.webutil.flask_util import error
1✔
27
from oauth_dropins.webutil.models import EncryptedProperty, JsonProperty, StringIdModel
1✔
28
from oauth_dropins.webutil.util import ellipsize, json_dumps, json_loads
1✔
29
from requests import RequestException
1✔
30
import secp256k1
1✔
31

32
import common
1✔
33
from common import (
1✔
34
    base64_to_long,
35
    DOMAIN_RE,
36
    long_to_base64,
37
    OLD_ACCOUNT_AGE,
38
    PROTOCOL_DOMAINS,
39
    report_error,
40
    unwrap,
41
)
42
import ids
1✔
43
import memcache
1✔
44

45
# maps string label to Protocol subclass. values are populated by ProtocolUserMeta.
46
# (we used to wait for ProtocolUserMeta to populate the keys as well, but that was
47
# awkward to use in datastore model properties with choices, below; it required
48
# overriding them in reset_model_properties, which was always flaky.)
49
PROTOCOLS = {label: None for label in (
1✔
50
    'activitypub',
51
    'ap',
52
    'atproto',
53
    'bsky',
54
    'nostr',
55
    'ostatus',
56
    'web',
57
    'webmention',
58
    'ui',
59
)}
60
DEBUG_PROTOCOLS = (
1✔
61
    'fa',
62
    'fake',
63
    'efake',
64
    'other',
65
)
66
if DEBUG:
1✔
67
    PROTOCOLS.update({label: None for label in DEBUG_PROTOCOLS})
1✔
68

69
# maps string kind (eg 'MagicKey') to Protocol subclass.
70
# populated in ProtocolUserMeta
71
PROTOCOLS_BY_KIND = {}
1✔
72

73
# 2048 bits makes tests slow, so use 1024 for them
74
KEY_BITS = 1024 if DEBUG else 2048
1✔
75
PAGE_SIZE = 20
1✔
76

77
# auto delete most old objects via the Object.expire property
78
# https://cloud.google.com/datastore/docs/ttl
79
#
80
# need to keep follows because we attach them to Followers and use them for
81
# unfollows
82
DONT_EXPIRE_OBJECT_TYPES = (as1.ACTOR_TYPES | as1.POST_TYPES
1✔
83
                            | set(['block', 'flag', 'follow', 'like', 'share']))
84
OBJECT_EXPIRE_AGE = timedelta(days=90)
1✔
85

86
GET_ORIGINALS_CACHE_EXPIRATION = timedelta(days=1)
1✔
87
FOLLOWERS_CACHE_EXPIRATION = timedelta(hours=2)
1✔
88

89
# See https://www.cloudimage.io/
90
IMAGE_PROXY_URL_BASE = 'https://aujtzahimq.cloudimg.io/v7/'
1✔
91
IMAGE_PROXY_DOMAINS = ('threads.net',)
1✔
92

93
USER_STATUS_DESCRIPTIONS = {  # keep in sync with DM.type!
1✔
94
    'moved': 'account has migrated to another account',
95
    'no-feed-or-webmention': "web site doesn't have an RSS or Atom feed or webmention endpoint",
96
    'nobot': "profile has 'nobot' in it",
97
    'nobridge': "profile has 'nobridge' in it",
98
    'no-nip05': "account's NIP-05 identifier is missing or invalid",
99
    'no-profile': 'profile is missing or empty',
100
    'opt-out': 'account or instance has requested to be opted out',
101
    'owns-webfinger': 'web site looks like a fediverse instance because it already serves Webfinger',
102
    'private': 'account is set as private or protected',
103
    'requires-avatar': "account doesn't have a profile picture",
104
    'requires-name': "account's name and username are the same",
105
    'requires-old-account': f"account is less than {humanize.naturaldelta(OLD_ACCOUNT_AGE)} old",
106
    'unsupported-handle-ap': f"<a href='https://fed.brid.gy/docs#fediverse-get-started'>username has characters that Bridgy Fed doesn't currently support</a>",
107
}
108

109
logger = logging.getLogger(__name__)
1✔
110

111

112
class Target(ndb.Model):
1✔
113
    r""":class:`protocol.Protocol` + URI pairs for identifying objects.
114

115
    These are currently used for:
116

117
    * delivery destinations, eg ActivityPub inboxes, webmention targets, etc.
118
    * copies of :class:`Object`\s and :class:`User`\s elsewhere,
119
      eg ``at://`` URIs for ATProto records, nevent etc bech32-encoded Nostr ids,
120
      ATProto user DIDs, etc.
121

122
    Used in :class:`google.cloud.ndb.model.StructuredProperty`\s inside
123
    :class:`Object` and :class:`User`; not stored as top-level entities in the
124
    datastore.
125

126
    ndb implements this by hoisting each property here into a corresponding
127
    property on the parent entity, prefixed by the StructuredProperty name
128
    below, eg ``delivered.uri``, ``delivered.protocol``, etc.
129

130
    For repeated StructuredPropertys, the hoisted properties are all repeated on
131
    the parent entity, and reconstructed into StructuredPropertys based on their
132
    order.
133

134
    https://googleapis.dev/python/python-ndb/latest/model.html#google.cloud.ndb.model.StructuredProperty
135
    """
136
    uri = ndb.StringProperty(required=True)
1✔
137
    ''
1✔
138
    protocol = ndb.StringProperty(choices=list(PROTOCOLS.keys()), required=True)
1✔
139
    ''
1✔
140

141
    def __eq__(self, other):
1✔
142
        """Equality excludes Targets' :class:`Key`."""
143
        if isinstance(other, Target):
1✔
144
            return self.uri == other.uri and self.protocol == other.protocol
1✔
145

146
    def __hash__(self):
1✔
147
        """Allow hashing so these can be dict keys."""
148
        return hash((self.protocol, self.uri))
1✔
149

150

151
class DM(ndb.Model):
1✔
152
    """:class:`protocol.Protocol` + type pairs for identifying sent DMs.
153

154
    Used in :attr:`User.sent_dms`.
155

156
    https://googleapis.dev/python/python-ndb/latest/model.html#google.cloud.ndb.model.StructuredProperty
157
    """
158
    type = ndb.StringProperty(required=True)
1✔
159
    """Known values (keep in sync with USER_STATUS_DESCRIPTIONS, the subset for
1✔
160
    ineligible users):
161

162
      * dms_not_supported-[RECIPIENT-USER-ID]
163
      * moved
164
      * no-feed-or-webmention
165
      * no-nip05
166
      * no-profile
167
      * opt-out
168
      * owns-webfinger
169
      * private
170
      * replied_to_bridged_user
171
      * request_bridging
172
      * requires-avatar
173
      * requires-name
174
      * requires-old-account
175
      * unsupported-handle-ap
176
      * welcome
177
    """
178
    protocol = ndb.StringProperty(choices=list(PROTOCOLS.keys()), required=True)
1✔
179
    ''
1✔
180

181
    def __eq__(self, other):
1✔
182
        """Equality excludes Targets' :class:`Key`."""
183
        return self.type == other.type and self.protocol == other.protocol
1✔
184

185

186
class ProtocolUserMeta(type(ndb.Model)):
1✔
187
    """:class:`User` metaclass. Registers all subclasses in ``PROTOCOLS``."""
188
    def __new__(meta, name, bases, class_dict):
1✔
189
        cls = super().__new__(meta, name, bases, class_dict)
1✔
190

191
        label = getattr(cls, 'LABEL', None)
1✔
192
        if (label and label not in ('protocol', 'user')
1✔
193
                and (DEBUG or cls.LABEL not in DEBUG_PROTOCOLS)):
194
            for label in (label, cls.ABBREV) + cls.OTHER_LABELS:
1✔
195
                if label:
1✔
196
                    PROTOCOLS[label] = cls
1✔
197
            PROTOCOLS_BY_KIND[cls._get_kind()] = cls
1✔
198

199
        return cls
1✔
200

201

202
def reset_protocol_properties():
1✔
203
    """Recreates various protocol properties to include choices from ``PROTOCOLS``."""
204
    abbrevs = f'({"|".join(PROTOCOLS.keys())}|fed)'
1✔
205
    common.SUBDOMAIN_BASE_URL_RE = re.compile(
1✔
206
        rf'^https?://({abbrevs}\.brid\.gy|localhost(:8080)?)/(convert/|r/)?({abbrevs}/)?(?P<path>.+)')
207
    ids.COPIES_PROTOCOLS = tuple(label for label, proto in PROTOCOLS.items()
1✔
208
                                 if proto and proto.HAS_COPIES)
209

210

211
@lru_cache(maxsize=100000)
1✔
212
@memcache.memoize(expire=GET_ORIGINALS_CACHE_EXPIRATION)
1✔
213
def get_original_object_key(copy_id):
1✔
214
    """Finds the :class:`Object` with a given copy id, if any.
215

216
    Note that :meth:`Object.add` also updates this function's
217
    :func:`memcache.memoize` cache.
218

219
    Args:
220
      copy_id (str)
221

222
    Returns:
223
      google.cloud.ndb.Key or None
224
    """
225
    assert copy_id
1✔
226

227
    return Object.query(Object.copies.uri == copy_id).get(keys_only=True)
1✔
228

229

230
@lru_cache(maxsize=100000)
1✔
231
@memcache.memoize(expire=GET_ORIGINALS_CACHE_EXPIRATION)
1✔
232
def get_original_user_key(copy_id):
1✔
233
    """Finds the user with a given copy id, if any.
234

235
    Note that :meth:`User.add` also updates this function's
236
    :func:`memcache.memoize` cache.
237

238
    Args:
239
      copy_id (str)
240

241
    Returns:
242
      google.cloud.ndb.Key or None
243
    """
244
    assert copy_id
1✔
245

246
    for proto in PROTOCOLS.values():
1✔
247
        if proto and proto.LABEL != 'ui' and not proto.owns_id(copy_id):
1✔
248
            if orig := proto.query(proto.copies.uri == copy_id).get(keys_only=True):
1✔
249
                return orig
1✔
250

251

252
class AddRemoveMixin:
1✔
253
    """Mixin class that defines the :meth:`add` and :meth:`remove` methods.
254

255
    If a subclass of this mixin defines the ``GET_ORIGINAL_FN`` class-level
256
    attribute, its memoize cache will be cleared when :meth:`remove` is called with
257
    the ``copies`` property.
258
    """
259

260
    lock = None
1✔
261
    """Synchronizes :meth:`add`, :meth:`remove`, etc."""
1✔
262

263
    def __init__(self, *args, **kwargs):
1✔
264
        super().__init__(*args, **kwargs)
1✔
265
        self.lock = Lock()
1✔
266

267
    def add(self, prop, val):
1✔
268
        """Adds a value to a multiply-valued property.
269

270
        Args:
271
          prop (str)
272
          val
273

274
        Returns:
275
          True if val was added, ie it wasn't already in prop, False otherwise
276
        """
277
        with self.lock:
1✔
278
            added = util.add(getattr(self, prop), val)
1✔
279

280
        if prop == 'copies' and added:
1✔
281
            if fn := getattr(self, 'GET_ORIGINAL_FN'):
1✔
282
                memcache.pickle_memcache.set(memcache.memoize_key(fn, val.uri),
1✔
283
                                             self.key)
284

285
        return added
1✔
286

287
    def remove(self, prop, val):
1✔
288
        """Removes a value from a multiply-valued property.
289

290
        Args:
291
          prop (str)
292
          val
293
        """
294
        with self.lock:
1✔
295
            existing = getattr(self, prop)
1✔
296
            if val in existing:
1✔
297
                existing.remove(val)
1✔
298

299
        if prop == 'copies':
1✔
300
            self.clear_get_original_cache(val.uri)
1✔
301

302
    def remove_copies_on(self, proto):
1✔
303
        """Removes all copies on a given protocol.
304

305
        ``proto.HAS_COPIES`` must be True.
306

307
        Args:
308
          proto (protocol.Protocol subclass)
309
        """
310
        assert proto.HAS_COPIES
1✔
311

312
        for copy in self.copies:
1✔
313
            if copy.protocol in (proto.ABBREV, proto.LABEL):
1✔
314
                self.remove('copies', copy)
1✔
315

316
    @classmethod
1✔
317
    def clear_get_original_cache(cls, uri):
1✔
318
        if fn := getattr(cls, 'GET_ORIGINAL_FN'):
1✔
319
            memcache.pickle_memcache.delete(memcache.memoize_key(fn, uri))
1✔
320

321

322
# WARNING: AddRemoveMixin *must* be before StringIdModel here so that its __init__
323
# gets called! Due to an (arguable) ndb.Model bug:
324
# https://github.com/googleapis/python-ndb/issues/1025
325
class User(AddRemoveMixin, StringIdModel, metaclass=ProtocolUserMeta):
1✔
326
    """Abstract base class for a Bridgy Fed user.
327

328
    Stores some protocols' keypairs. Currently:
329

330
    * RSA keypair for ActivityPub HTTP Signatures
331
      properties: ``mod``, ``public_exponent``, ``private_exponent``, all
332
      encoded as base64url (ie URL-safe base64) strings as described in RFC
333
      4648 and section 5.1 of the Magic Signatures spec:
334
      https://tools.ietf.org/html/draft-cavage-http-signatures-12
335
    * *Not* K-256 signing or rotation keys for AT Protocol, those are stored in
336
      :class:`arroba.datastore_storage.AtpRepo` entities
337
    """
338
    GET_ORIGINAL_FN = get_original_user_key
1✔
339
    'used by AddRemoveMixin'
1✔
340

341
    obj_key = ndb.KeyProperty(kind='Object')  # user profile
1✔
342
    ''
1✔
343
    use_instead = ndb.KeyProperty()
1✔
344
    ''
1✔
345

346
    copies = ndb.StructuredProperty(Target, repeated=True)
1✔
347
    """Proxy copies of this user elsewhere, eg DIDs for ATProto records, bech32
1✔
348
    npub Nostr ids, etc. Similar to ``rel-me`` links in microformats2,
349
    ``alsoKnownAs`` in DID docs (and now AS2), etc.
350
    """
351

352
    mod = ndb.StringProperty()
1✔
353
    """Part of the bridged ActivityPub actor's private key."""
1✔
354
    public_exponent = ndb.StringProperty()
1✔
355
    """Part of the bridged ActivityPub actor's private key."""
1✔
356
    private_exponent = ndb.StringProperty()
1✔
357
    """Part of the bridged ActivityPub actor's private key."""
1✔
358
    nostr_key_bytes = EncryptedProperty()
1✔
359
    """The bridged Nostr account's secp256k1 private key, in raw bytes."""
1✔
360

361
    manual_opt_out = ndb.BooleanProperty()
1✔
362
    """Set to True to manually disable this user. Set to False to override spam filters and forcibly enable this user."""
1✔
363

364
    enabled_protocols = ndb.StringProperty(repeated=True,
1✔
365
                                           choices=list(PROTOCOLS.keys()))
366
    """Protocols that this user has explicitly opted into.
1✔
367

368
    Protocols that don't require explicit opt in are omitted here.
369
    """
370

371
    sent_dms = ndb.StructuredProperty(DM, repeated=True)
1✔
372
    """DMs that we've attempted to send to this user."""
1✔
373

374
    send_notifs = ndb.StringProperty(default='all', choices=('all', 'none'))
1✔
375
    """Which notifications we should send this user."""
1✔
376

377
    created = ndb.DateTimeProperty(auto_now_add=True)
1✔
378
    ''
1✔
379
    updated = ndb.DateTimeProperty(auto_now=True)
1✔
380
    ''
1✔
381

382
    # `existing` attr is set by get_or_create
383

384
    # OLD. some stored entities still have these; do not reuse.
385
    # direct = ndb.BooleanProperty(default=False)
386
    # actor_as2 = JsonProperty()
387
    # protocol-specific state
388
    # atproto_notifs_indexed_at = ndb.TextProperty()
389
    # atproto_feed_indexed_at = ndb.TextProperty()
390

391
    def __init__(self, **kwargs):
1✔
392
        """Constructor.
393

394
        Sets :attr:`obj` explicitly because however
395
        :class:`google.cloud.ndb.model.Model` sets it doesn't work with
396
        ``@property`` and ``@obj.setter`` below.
397
        """
398
        obj = kwargs.pop('obj', None)
1✔
399
        super().__init__(**kwargs)
1✔
400

401
        if obj:
1✔
402
            self.obj = obj
1✔
403

404
    @classmethod
1✔
405
    def new(cls, **kwargs):
1✔
406
        """Try to prevent instantiation. Use subclasses instead."""
407
        raise NotImplementedError()
×
408

409
    def _post_put_hook(self, future):
1✔
410
        logger.debug(f'Wrote {self.key}')
1✔
411

412
    @classmethod
1✔
413
    def get_by_id(cls, id, allow_opt_out=False, **kwargs):
1✔
414
        """Override to follow ``use_instead`` property and ``status``.
415

416
        Returns None if the user is opted out.
417
        """
418
        user = cls._get_by_id(id, **kwargs)
1✔
419
        if user and user.use_instead:
1✔
420
            logger.info(f'{user.key} use_instead => {user.use_instead}')
1✔
421
            user = user.use_instead.get()
1✔
422

423
        if not user:
1✔
424
            return None
1✔
425

426
        if user.status and not allow_opt_out:
1✔
427
            logger.info(f'{user.key} is {user.status}')
1✔
428
            return None
1✔
429

430
        return user
1✔
431

432
    @classmethod
1✔
433
    def get_or_create(cls, id, propagate=False, allow_opt_out=False,
1✔
434
                      reload=False, **kwargs):
435
        """Loads and returns a :class:`User`. Creates it if necessary.
436

437
        Not transactional because transactions don't read or write memcache. :/
438
        Fortunately we don't really depend on atomicity for much, last writer wins
439
        is usually fine.
440

441
        Args:
442
          propagate (bool): whether to create copies of this user in push-based
443
            protocols, eg ATProto and Nostr.
444
          allow_opt_out (bool): whether to allow and create the user if they're
445
            currently opted out
446
          reload (bool): whether to reload profile always, vs only if necessary
447
          kwargs: passed through to ``cls`` constructor
448

449
        Returns:
450
          User: existing or new user, or None if the user is opted out
451
        """
452
        assert cls != User
1✔
453

454
        user = cls.get_by_id(id, allow_opt_out=True)
1✔
455
        if user:  # existing
1✔
456
            if reload:
1✔
457
                user.reload_profile(gateway=True, raise_=False)
1✔
458

459
            if user.status and not allow_opt_out:
1✔
460
                return None
1✔
461
            user.existing = True
1✔
462

463
            # TODO: propagate more fields?
464
            changed = False
1✔
465
            for field in ['obj', 'obj_key']:
1✔
466
                old_val = getattr(user, field, None)
1✔
467
                new_val = kwargs.get(field)
1✔
468
                if old_val is None and new_val is not None:
1✔
469
                    setattr(user, field, new_val)
×
470
                    changed = True
×
471

472
            if enabled_protocols := kwargs.get('enabled_protocols'):
1✔
473
                user.enabled_protocols = (set(user.enabled_protocols)
1✔
474
                                          | set(enabled_protocols))
475
                changed = True
1✔
476

477
            if not propagate:
1✔
478
                if changed:
1✔
479
                    try:
1✔
480
                        user.put()
1✔
481
                    except AssertionError as e:
×
482
                        error(f'Bad {cls.__name__} id {id} : {e}')
×
483
                return user
1✔
484

485
        else:  # new, not existing
486
            if orig_key := get_original_user_key(id):
1✔
487
                orig = orig_key.get()
1✔
488
                if orig.status and not allow_opt_out:
1✔
489
                    return None
×
490
                orig.existing = False
1✔
491
                return orig
1✔
492

493
            user = cls(id=id, **kwargs)
1✔
494
            user.existing = False
1✔
495
            try:
1✔
496
                user.reload_profile(gateway=True, raise_=False)
1✔
497
            except AssertionError as e:
1✔
498
                error(f'Bad {cls.__name__} id {id} : {e}')
1✔
499

500
            if user.status and not allow_opt_out:
1✔
501
                return None
1✔
502

503
        if propagate and user.status in (None, 'private'):
1✔
504
            for label in user.enabled_protocols + list(user.DEFAULT_ENABLED_PROTOCOLS):
1✔
505
                proto = PROTOCOLS[label]
1✔
506
                if proto == cls:
1✔
507
                    continue
×
508
                elif proto.HAS_COPIES:
1✔
509
                    if not user.get_copy(proto) and user.is_enabled(proto):
1✔
510
                        try:
1✔
511
                            proto.create_for(user)
1✔
512
                        except (ValueError, AssertionError):
1✔
513
                            logger.info(f'failed creating {proto.LABEL} copy',
1✔
514
                                        exc_info=True)
515
                            user.remove('enabled_protocols', proto.LABEL)
1✔
516
                    else:
517
                        logger.debug(f'{proto.LABEL} not enabled or user copy already exists, skipping propagate')
1✔
518

519
        try:
1✔
520
            user.put()
1✔
521
        except AssertionError as e:
×
522
            error(f'Bad {cls.__name__} id {id} : {e}')
×
523

524
        logger.debug(('Updated ' if user.existing else 'Created new ') + str(user))
1✔
525
        return user
1✔
526

527
    @property
1✔
528
    def obj(self):
1✔
529
        """Convenience accessor that loads :attr:`obj_key` from the datastore."""
530
        if self.obj_key:
1✔
531
            if not hasattr(self, '_obj'):
1✔
532
                self._obj = self.obj_key.get()
1✔
533
            return self._obj
1✔
534

535
    @obj.setter
1✔
536
    def obj(self, obj):
1✔
537
        if obj:
1✔
538
            assert isinstance(obj, Object)
1✔
539
            assert obj.key
1✔
540
            self._obj = obj
1✔
541
            self.obj_key = obj.key
1✔
542
        else:
543
            self._obj = self.obj_key = None
1✔
544

545
    def delete(self, proto=None):
1✔
546
        """Deletes a user's bridged actors in all protocols or a specific one.
547

548
        Args:
549
          proto (Protocol): optional
550
        """
551
        now = util.now().isoformat()
1✔
552
        proto_label = proto.LABEL if proto else 'all'
1✔
553
        delete_id = f'{self.profile_id()}#bridgy-fed-delete-user-{proto_label}-{now}'
1✔
554
        delete = Object(id=delete_id, source_protocol=self.LABEL, our_as1={
1✔
555
            'id': delete_id,
556
            'objectType': 'activity',
557
            'verb': 'delete',
558
            'actor': self.key.id(),
559
            'object': self.key.id(),
560
        })
561
        self.deliver(delete, from_user=self, to_proto=proto)
1✔
562

563
    @classmethod
1✔
564
    def load_multi(cls, users):
1✔
565
        """Loads :attr:`obj` for multiple users in parallel.
566

567
        Args:
568
          users (sequence of User)
569
        """
570
        objs = ndb.get_multi(u.obj_key for u in users if u.obj_key)
1✔
571
        keys_to_objs = {o.key: o for o in objs if o}
1✔
572

573
        for u in users:
1✔
574
            u._obj = keys_to_objs.get(u.obj_key)
1✔
575

576
    @ndb.ComputedProperty
1✔
577
    def handle(self):
1✔
578
        """This user's unique, human-chosen handle, eg ``@me@snarfed.org``.
579

580
        To be implemented by subclasses.
581
        """
582
        raise NotImplementedError()
×
583

584
    @ndb.ComputedProperty
1✔
585
    def handle_as_domain(self):
1✔
586
        """This user's handle in domain-like format, via :func:`id.handle_as_domain`.
587

588
        Returns:
589
          str or None: if handle is None
590
        """
591
        return ids.handle_as_domain(self.handle)
1✔
592

593
    @ndb.ComputedProperty
1✔
594
    def status(self):
1✔
595
        """Whether this user is blocked or opted out.
596

597
        Optional. See :attr:`USER_STATUS_DESCRIPTIONS` for possible values.
598
        """
599
        if self.manual_opt_out:
1✔
600
            return 'opt-out'
1✔
601
        elif self.manual_opt_out is False:
1✔
602
            return None
1✔
603

604
        # TODO: require profile for more protocols? all?
605
        if not self.obj or not self.obj.as1:
1✔
606
            return None
1✔
607

608
        if self.obj.as1.get('bridgeable') is False:  # FEP-0036
1✔
609
            return 'opt-out'
1✔
610

611
        if self.REQUIRES_AVATAR and not self.obj.as1.get('image'):
1✔
612
            return 'requires-avatar'
1✔
613

614
        name = self.obj.as1.get('displayName')
1✔
615
        if self.REQUIRES_NAME and (not name or name in (self.handle, self.key.id())):
1✔
616
            return 'requires-name'
1✔
617

618
        if self.REQUIRES_OLD_ACCOUNT:
1✔
619
            if published := self.obj.as1.get('published'):
1✔
620
                if util.now() - util.parse_iso8601(published) < OLD_ACCOUNT_AGE:
1✔
621
                    return 'requires-old-account'
1✔
622

623
        # https://swicg.github.io/miscellany/#movedTo
624
        # https://docs.joinmastodon.org/spec/activitypub/#as
625
        if self.obj.as1.get('movedTo'):
1✔
626
            return 'moved'
1✔
627

628
        summary = html_to_text(self.obj.as1.get('summary', ''), ignore_links=True)
1✔
629
        name = html_to_text(self.obj.as1.get('displayName', ''), ignore_links=True)
1✔
630

631
        # #nobridge overrides enabled_protocols
632
        if '#nobridge' in summary or '#nobridge' in name:
1✔
633
            return 'nobridge'
1✔
634

635
        # user has explicitly opted in. should go after spam filter (REQUIRES_*)
636
        # checks, but before is_public and #nobot
637
        #
638
        # !!! WARNING: keep in sync with User.enable_protocol!
639
        if self.enabled_protocols:
1✔
640
            return None
1✔
641

642
        if not as1.is_public(self.obj.as1, unlisted=False):
1✔
643
            return 'private'
1✔
644

645
        # enabled_protocols overrides #nobot
646
        if '#nobot' in summary or '#nobot' in name:
1✔
647
            return 'nobot'
1✔
648

649
    def is_enabled(self, to_proto, explicit=False):
1✔
650
        """Returns True if this user can be bridged to a given protocol.
651

652
        Reasons this might return False:
653
        * We haven't turned on bridging these two protocols yet.
654
        * The user is opted out or blocked.
655
        * The user is on a domain that's opted out or blocked.
656
        * The from protocol requires opt in, and the user hasn't opted in.
657
        * ``explicit`` is True, and this protocol supports ``to_proto`` by, but the user hasn't explicitly opted into it.
658

659
        Args:
660
          to_proto (Protocol subclass)
661
          explicit (bool)
662

663
        Returns:
664
          bool:
665
        """
666
        from protocol import Protocol
1✔
667
        assert isinstance(to_proto, Protocol) or issubclass(to_proto, Protocol)
1✔
668

669
        if self.__class__ == to_proto:
1✔
670
            return True
1✔
671

672
        from_label = self.LABEL
1✔
673
        to_label = to_proto.LABEL
1✔
674

675
        if bot_protocol := Protocol.for_bridgy_subdomain(self.key.id()):
1✔
676
            return to_proto != bot_protocol
1✔
677

678
        elif self.manual_opt_out:
1✔
679
            return False
1✔
680

681
        elif to_label in self.enabled_protocols:
1✔
682
            return True
1✔
683

684
        elif self.status:
1✔
685
            return False
1✔
686

687
        elif to_label in self.DEFAULT_ENABLED_PROTOCOLS and not explicit:
1✔
688
            return True
1✔
689

690
        return False
1✔
691

692
    def enable_protocol(self, to_proto):
1✔
693
        """Adds ``to_proto`` to :attr:`enabled_protocols`.
694

695
        Also sends a welcome DM to the user (via a send task) if their protocol
696
        supports DMs.
697

698
        Args:
699
          to_proto (:class:`protocol.Protocol` subclass)
700
        """
701
        import dms
1✔
702

703
        # explicit opt-in overrides some status
704
        # !!! WARNING: keep in sync with User.status!
705
        ineligible = """Hi! Your account isn't eligible for bridging yet because your {desc}. <a href="https://fed.brid.gy/docs#troubleshooting">More details here.</a> You can try again once that's fixed by unfollowing and re-following this account."""
1✔
706
        if self.status and self.status not in ('nobot', 'private'):
1✔
707
            if desc := USER_STATUS_DESCRIPTIONS.get(self.status):
1✔
708
                dms.maybe_send(from_=to_proto, to_user=self, type=self.status,
1✔
709
                               text=ineligible.format(desc=desc))
710
            common.error(f'Nope, user {self.key.id()} is {self.status}', status=299)
1✔
711

712
        try:
1✔
713
            self.handle_as(to_proto)
1✔
714
        except ValueError as e:
1✔
715
            dms.maybe_send(from_=to_proto, to_user=self,
1✔
716
                           type=f'unsupported-handle-{to_proto.ABBREV}',
717
                           text=ineligible.format(desc=e))
718
            common.error(str(e), status=299)
1✔
719

720
        if to_proto.LABEL in ids.COPIES_PROTOCOLS:
1✔
721
            # do this even if there's an existing copy since we might need to
722
            # reactivate it, which create_for should do
723
            to_proto.create_for(self)
1✔
724

725
        if to_proto.LABEL not in self.enabled_protocols:
1✔
726
            self.enabled_protocols.append(to_proto.LABEL)
1✔
727
            dms.maybe_send(from_=to_proto, to_user=self, type='welcome', text=f"""Welcome to Bridgy Fed! Your account will soon be bridged to {to_proto.PHRASE} at {self.user_link(proto=to_proto, name=False)}. <a href="https://fed.brid.gy/docs">See the docs</a> and <a href="https://{common.PRIMARY_DOMAIN}{self.user_page_path()}">your user page</a> for more information. To disable this and delete your bridged profile, block this account.""")
1✔
728
            self.put()
1✔
729

730
        msg = f'Enabled {to_proto.LABEL} for {self.key.id()} : {self.user_page_path()}'
1✔
731
        logger.info(msg)
1✔
732

733
    def disable_protocol(self, to_proto):
1✔
734
        """Removes ``to_proto` from :attr:`enabled_protocols``.
735

736
        Args:
737
          to_proto (:class:`protocol.Protocol` subclass)
738
        """
739
        self.remove('enabled_protocols', to_proto.LABEL)
1✔
740
        self.put()
1✔
741
        msg = f'Disabled {to_proto.LABEL} for {self.key.id()} : {self.user_page_path()}'
1✔
742
        logger.info(msg)
1✔
743

744
    def handle_as(self, to_proto, short=False):
1✔
745
        """Returns this user's handle in a different protocol.
746

747
        Args:
748
          to_proto (str or Protocol)
749
          short (bool): whether to return the full handle or a shortened form.
750
            Default False. Currently only affects ActivityPub; returns just
751
            ``@[user]`` instead of ``@[user]@[domain]``
752

753
        Returns:
754
          str
755
        """
756
        if isinstance(to_proto, str):
1✔
757
            to_proto = PROTOCOLS[to_proto]
1✔
758

759
        # override to-ATProto to use custom domain handle in DID doc
760
        from atproto import ATProto, did_to_handle
1✔
761
        if to_proto == ATProto:
1✔
762
            if did := self.get_copy(ATProto):
1✔
763
                if handle := did_to_handle(did, remote=False):
1✔
764
                    return handle
1✔
765

766
        # override web users to always use domain instead of custom username
767
        # TODO: fall back to id if handle is unset?
768
        handle = self.key.id() if self.LABEL == 'web' else self.handle
1✔
769
        if not handle:
1✔
770
            return None
1✔
771

772
        return ids.translate_handle(handle=handle, from_=self.__class__,
1✔
773
                                    to=to_proto, short=short)
774

775
    def id_as(self, to_proto):
1✔
776
        """Returns this user's id in a different protocol.
777

778
        Args:
779
          to_proto (str or Protocol)
780

781
        Returns:
782
          str
783
        """
784
        if isinstance(to_proto, str):
1✔
785
            to_proto = PROTOCOLS[to_proto]
1✔
786

787
        return ids.translate_user_id(id=self.key.id(), from_=self.__class__,
1✔
788
                                     to=to_proto)
789

790
    def handle_or_id(self):
1✔
791
        """Returns handle if we know it, otherwise id."""
792
        return self.handle or self.key.id()
1✔
793

794
    def public_pem(self):
1✔
795
        """Returns the user's PEM-encoded ActivityPub public RSA key.
796

797
        Returns:
798
          bytes:
799
        """
800
        self._maybe_generate_ap_key()
1✔
801
        rsa = RSA.construct((base64_to_long(str(self.mod)),
1✔
802
                             base64_to_long(str(self.public_exponent))))
803
        return rsa.exportKey(format='PEM')
1✔
804

805
    def private_pem(self):
1✔
806
        """Returns the user's PEM-encoded ActivityPub private RSA key.
807

808
        Returns:
809
          bytes:
810
        """
811
        self._maybe_generate_ap_key()
1✔
812
        rsa = RSA.construct((base64_to_long(str(self.mod)),
1✔
813
                             base64_to_long(str(self.public_exponent)),
814
                             base64_to_long(str(self.private_exponent))))
815
        return rsa.exportKey(format='PEM')
1✔
816

817
    def _maybe_generate_ap_key(self):
1✔
818
        """Generates this user's ActivityPub private key if necessary."""
819
        if not self.public_exponent or not self.private_exponent or not self.mod:
1✔
820
            logger.info(f'generating AP keypair for {self.key}')
1✔
821
            assert (not self.public_exponent and not self.private_exponent
1✔
822
                    and not self.mod), id
823
            key = RSA.generate(KEY_BITS, randfunc=random.randbytes if DEBUG else None)
1✔
824
            self.mod = long_to_base64(key.n)
1✔
825
            self.public_exponent = long_to_base64(key.e)
1✔
826
            self.private_exponent = long_to_base64(key.d)
1✔
827
            self.put()
1✔
828

829
    def nsec(self):
1✔
830
        """Returns the user's bech32-encoded Nostr private secp256k1 key.
831

832
        Returns:
833
          str:
834
        """
835
        self._maybe_generate_nostr_key()
1✔
836
        privkey = secp256k1.PrivateKey(self.nostr_key_bytes, raw=True)
1✔
837
        return granary.nostr.bech32_encode('nsec', privkey.serialize())
1✔
838

839
    def hex_pubkey(self):
1✔
840
        """Returns the user's hex-encoded Nostr public secp256k1 key.
841

842
        Returns:
843
          str:
844
        """
845
        self._maybe_generate_nostr_key()
1✔
846
        return granary.nostr.pubkey_from_privkey(self.nostr_key_bytes.hex())
1✔
847

848
    def npub(self):
1✔
849
        """Returns the user's bech32-encoded ActivityPub public secp256k1 key.
850

851
        Returns:
852
          str:
853
        """
854
        return granary.nostr.bech32_encode('npub', self.hex_pubkey())
1✔
855

856
    def _maybe_generate_nostr_key(self):
1✔
857
        """Generates this user's Nostr private key if necessary."""
858
        if not self.nostr_key_bytes:
1✔
859
            logger.info(f'generating Nostr keypair for {self.key}')
1✔
860
            self.nostr_key_bytes = secp256k1.PrivateKey().private_key
1✔
861
            self.put()
1✔
862

863
    def name(self):
1✔
864
        """Returns this user's human-readable name, eg ``Ryan Barrett``."""
865
        if self.obj and self.obj.as1:
1✔
866
            name = self.obj.as1.get('displayName')
1✔
867
            if name:
1✔
868
                return name
1✔
869

870
        return self.handle_or_id()
1✔
871

872
    def web_url(self):
1✔
873
        """Returns this user's web URL (homepage), eg ``https://foo.com/``.
874

875
        To be implemented by subclasses.
876

877
        Returns:
878
          str
879
        """
880
        raise NotImplementedError()
×
881

882
    def is_web_url(self, url, ignore_www=False):
1✔
883
        """Returns True if the given URL is this user's web URL (homepage).
884

885
        Args:
886
          url (str)
887
          ignore_www (bool): if True, ignores ``www.`` subdomains
888

889
        Returns:
890
          bool:
891
        """
892
        if not url:
1✔
893
            return False
1✔
894

895
        url = url.strip().rstrip('/')
1✔
896
        url = re.sub(r'^(https?://)www\.', r'\1', url)
1✔
897
        parsed_url = urlparse(url)
1✔
898
        if parsed_url.scheme not in ('http', 'https', ''):
1✔
899
            return False
1✔
900

901
        this = self.web_url().rstrip('/')
1✔
902
        this = re.sub(r'^(https?://)www\.', r'\1', this)
1✔
903
        parsed_this = urlparse(this)
1✔
904

905
        return (url == this or url == parsed_this.netloc or
1✔
906
                parsed_url[1:] == parsed_this[1:])  # ignore http vs https
907

908
    def id_uri(self):
1✔
909
        """Returns the user id as a URI.
910

911
        Sometimes this is the user id itself, eg ActivityPub actor ids.
912
        Sometimes it's a bit different, eg at://did:plc:... for ATProto user,
913
        https://site.com for Web users.
914

915
        Returns:
916
          str
917
        """
918
        return self.key.id()
1✔
919

920
    def profile_id(self):
1✔
921
        """Returns the id of this user's profile object in its native protocol.
922

923
        Examples:
924

925
        * Web: home page URL, eg ``https://me.com/``
926
        * ActivityPub: actor URL, eg ``https://instance.com/users/me``
927
        * ATProto: profile AT URI, eg ``at://did:plc:123/app.bsky.actor.profile/self``
928

929
        Defaults to this user's key id.
930

931
        Returns:
932
          str or None:
933
        """
934
        return ids.profile_id(id=self.key.id(), proto=self)
1✔
935

936
    def is_profile(self, obj):
1✔
937
        """Returns True if ``obj`` is this user's profile/actor, False otherwise.
938

939
        Args:
940
          obj (Object)
941

942
        Returns:
943
          bool:
944
        """
945
        if obj.key.id() in (self.key.id(), self.profile_id()):
1✔
946
            return True
1✔
947

948
        if self.obj_key and obj.key.id() == self.obj_key.id():
1✔
949
            return True
1✔
950

951
    def reload_profile(self, **kwargs):
1✔
952
        """Reloads this user's identity and profile from their native protocol.
953

954
        Populates the reloaded profile :class:`Object` in ``self.obj``.
955

956
        Args:
957
          kwargs: passed through to :meth:`Protocol.load`
958
        """
959
        obj = self.load(self.profile_id(), remote=True, **kwargs)
1✔
960
        if obj:
1✔
961
            self.obj = obj
1✔
962

963
        # write the user so that we re-populate any computed properties
964
        self.put()
1✔
965

966
    def user_page_path(self, rest=None, prefer_id=False):
1✔
967
        """Returns the user's Bridgy Fed user page path.
968

969
        Args:
970
          rest (str): additional path and/or query to add to the end
971
          prefer_id (bool): whether to prefer to use the account's id in the path
972
            instead of handle. Defaults to ``False``.
973
        """
974
        path = f'/{self.ABBREV}/{self.key.id() if prefer_id else self.handle_or_id()}'
1✔
975

976
        if rest:
1✔
977
            if not (rest.startswith('?') or rest.startswith('/')):
1✔
978
                path += '/'
1✔
979
            path += rest
1✔
980

981
        return path
1✔
982

983
    def get_copy(self, proto):
1✔
984
        """Returns the id for the copy of this user in a given protocol.
985

986
        ...or None if no such copy exists. If ``proto`` is this user, returns
987
        this user's key id.
988

989
        Args:
990
          proto: :class:`Protocol` subclass
991

992
        Returns:
993
          str:
994
        """
995
        # don't use isinstance because the testutil Fake protocol has subclasses
996
        if self.LABEL == proto.LABEL:
1✔
997
            return self.key.id()
1✔
998

999
        for copy in self.copies:
1✔
1000
            if copy.protocol in (proto.LABEL, proto.ABBREV):
1✔
1001
                return copy.uri
1✔
1002

1003
    def user_link(self, name=True, handle=True, pictures=False, logo=None,
1✔
1004
                  proto=None, proto_fallback=False):
1005
        """Returns a pretty HTML link to the user's profile.
1006

1007
        Can optionally include display name, handle, profile
1008
        picture, and/or link to a different protocol that they've enabled.
1009

1010
        TODO: unify with :meth:`Object.actor_link`?
1011

1012
        Args:
1013
          name (bool): include display name
1014
          handle (bool): True to include handle, False to exclude it, ``'short'``
1015
            to include a shortened version, if available
1016
          pictures (bool): include profile picture and protocol logo
1017
          logo (str): optional path to platform logo to show instead of the
1018
            protocol's default
1019
          proto (protocol.Protocol): link to this protocol instead of the user's
1020
            native protocol
1021
          proto_fallback (bool): if True, and ``proto`` is provided and has no
1022
            no canonical profile URL for bridged users, uses the user's profile
1023
            URL in their native protocol
1024
        """
1025
        img = name_str = full_handle = handle_str = dot = logo_html = a_open = a_close = ''
1✔
1026

1027
        if proto:
1✔
1028
            assert self.is_enabled(proto), f"{proto.LABEL} isn't enabled"
1✔
1029
            url = proto.bridged_web_url_for(self, fallback=proto_fallback)
1✔
1030
        else:
1031
            proto = self.__class__
1✔
1032
            url = self.web_url()
1✔
1033

1034
        if pictures:
1✔
1035
            if logo:
1✔
1036
                logo_html = f'<img class="logo" src="{logo}" /> '
1✔
1037
            else:
1038
                logo_html = f'<span class="logo" title="{proto.__name__}">{proto.LOGO_HTML or proto.LOGO_EMOJI}</span> '
1✔
1039
            if pic := self.profile_picture():
1✔
1040
                img = f'<img src="{pic}" class="profile"> '
1✔
1041

1042
        if handle:
1✔
1043
            full_handle = self.handle_as(proto) or ''
1✔
1044
            handle_str = self.handle_as(proto, short=(handle == 'short')) or ''
1✔
1045

1046
        if name and self.name() != full_handle:
1✔
1047
            name_str = self.name() or ''
1✔
1048

1049
        if handle_str and name_str:
1✔
1050
            dot = ' &middot; '
1✔
1051

1052
        if url:
1✔
1053
            a_open = f'<a class="h-card u-author mention" rel="me" href="{url}" title="{name_str}{dot}{full_handle}">'
1✔
1054
            a_close = '</a>'
1✔
1055

1056
        name_html = f'<span style="unicode-bidi: isolate">{ellipsize(name_str, chars=40)}</span>' if name_str else ''
1✔
1057
        return f'{logo_html}{a_open}{img}{name_html}{dot}{ellipsize(handle_str, chars=40)}{a_close}'
1✔
1058

1059
    def profile_picture(self):
1✔
1060
        """Returns the user's profile picture image URL, if available, or None."""
1061
        if self.obj and self.obj.as1:
1✔
1062
            return util.get_url(self.obj.as1, 'image')
1✔
1063

1064
    # can't use functools.lru_cache here because we want the cache key to be
1065
    # just the user id, not the whole entity
1066
    @cachetools.cached(
1✔
1067
        cachetools.TTLCache(50000, FOLLOWERS_CACHE_EXPIRATION.total_seconds()),
1068
        key=lambda user: user.key.id(), lock=Lock())
1069
    @memcache.memoize(key=lambda self: self.key.id(),
1✔
1070
                      expire=FOLLOWERS_CACHE_EXPIRATION)
1071
    def count_followers(self):
1✔
1072
        """Counts this user's followers and followings.
1073

1074
        Returns:
1075
          (int, int) tuple: (number of followers, number following)
1076
        """
1077
        if self.key.id() in PROTOCOL_DOMAINS:
1✔
1078
            # we don't store Followers for protocol bot users any more, so
1079
            # follower counts are inaccurate, so don't return them
1080
            return (0, 0)
1✔
1081

1082
        num_followers = Follower.query(Follower.to == self.key,
1✔
1083
                                       Follower.status == 'active')\
1084
                                .count_async()
1085
        num_following = Follower.query(Follower.from_ == self.key,
1✔
1086
                                       Follower.status == 'active')\
1087
                                .count_async()
1088
        return num_followers.get_result(), num_following.get_result()
1✔
1089

1090

1091
# WARNING: AddRemoveMixin *must* be before StringIdModel here so that its __init__
1092
# gets called! Due to an (arguable) ndb.Model bug:
1093
# https://github.com/googleapis/python-ndb/issues/1025
1094
class Object(AddRemoveMixin, StringIdModel):
1✔
1095
    """An activity or other object, eg actor.
1096

1097
    Key name is the id, generally a URI. We synthesize ids if necessary.
1098
    """
1099
    GET_ORIGINAL_FN = get_original_object_key
1✔
1100
    'used by AddRemoveMixin'
1✔
1101

1102
    users = ndb.KeyProperty(repeated=True)
1✔
1103
    'User(s) who created or otherwise own this object.'
1✔
1104

1105
    notify = ndb.KeyProperty(repeated=True)
1✔
1106
    """User who should see this in their user page, eg in reply to, reaction to,
1✔
1107
    share of, etc.
1108
    """
1109
    feed = ndb.KeyProperty(repeated=True)
1✔
1110
    'User who should see this in their feeds, eg followers of its creator'
1✔
1111

1112
    source_protocol = ndb.StringProperty(choices=list(PROTOCOLS.keys()))
1✔
1113
    """The protocol this object originally came from.
1✔
1114

1115
    TODO: nail down whether this is :attr:`ABBREV`` or :attr:`LABEL`
1116
    """
1117

1118
    # TODO: switch back to ndb.JsonProperty if/when they fix it for the web console
1119
    # https://github.com/googleapis/python-ndb/issues/874
1120
    as2 = JsonProperty()
1✔
1121
    'ActivityStreams 2, for ActivityPub'
1✔
1122
    bsky = JsonProperty()
1✔
1123
    'AT Protocol lexicon, for Bluesky'
1✔
1124
    mf2 = JsonProperty()
1✔
1125
    'HTML microformats2 item (*not* top level parse object with ``items`` field)'
1✔
1126
    nostr = JsonProperty()
1✔
1127
    'Nostr event'
1✔
1128
    our_as1 = JsonProperty()
1✔
1129
    'ActivityStreams 1, for activities that we generate or modify ourselves'
1✔
1130
    raw = JsonProperty()
1✔
1131
    'Other standalone data format, eg DID document'
1✔
1132

1133
    extra_as1 = JsonProperty()
1✔
1134
    "Additional individual fields to merge into this object's AS1 representation"
1✔
1135

1136
    # TODO: remove and actually delete Objects instead!
1137
    deleted = ndb.BooleanProperty()
1✔
1138
    ''
1✔
1139

1140
    copies = ndb.StructuredProperty(Target, repeated=True)
1✔
1141
    """Copies of this object elsewhere, eg at:// URIs for ATProto records and
1✔
1142
    nevent etc bech32-encoded Nostr ids, where this object is the original.
1143
    Similar to u-syndication links in microformats2 and
1144
    upstream/downstreamDuplicates in AS1.
1145
    """
1146

1147
    created = ndb.DateTimeProperty(auto_now_add=True)
1✔
1148
    ''
1✔
1149
    updated = ndb.DateTimeProperty(auto_now=True)
1✔
1150
    ''
1✔
1151

1152
    new = None
1✔
1153
    """True if this object is new, ie this is the first time we've seen it,
1✔
1154
    False otherwise, None if we don't know.
1155
    """
1156
    changed = None
1✔
1157
    """True if this object's contents have changed from our existing copy in the
1✔
1158
    datastore, False otherwise, None if we don't know. :class:`Object` is
1159
    new/changed. See :meth:`activity_changed()` for more details.
1160
    """
1161

1162
    # DEPRECATED
1163
    # These were for full feeds with multiple items, not just this one, so they were
1164
    # stored as audit records only, not used in to_as1. for Atom/RSS
1165
    # based Objects, our_as1 was populated with an feed_index top-level
1166
    # integer field that indexed into one of these.
1167
    #
1168
    # atom = ndb.TextProperty() # Atom XML
1169
    # rss = ndb.TextProperty()  # RSS XML
1170

1171
    # DEPRECATED; these were for delivery tracking, but they were too expensive,
1172
    # so we stopped: https://github.com/snarfed/bridgy-fed/issues/1501
1173
    #
1174
    # STATUSES = ('new', 'in progress', 'complete', 'failed', 'ignored')
1175
    # status = ndb.StringProperty(choices=STATUSES)
1176
    # delivered = ndb.StructuredProperty(Target, repeated=True)
1177
    # undelivered = ndb.StructuredProperty(Target, repeated=True)
1178
    # failed = ndb.StructuredProperty(Target, repeated=True)
1179

1180
    # DEPRECATED but still used read only to maintain backward compatibility
1181
    # with old Objects in the datastore that we haven't bothered migrating.
1182
    #
1183
    # domains = ndb.StringProperty(repeated=True)
1184

1185
    # DEPRECATED; replaced by :attr:`users`, :attr:`notify`, :attr:`feed`
1186
    #
1187
    # labels = ndb.StringProperty(repeated=True,
1188
    #                             choices=('activity', 'feed', 'notification', 'user'))
1189

1190
    @property
1✔
1191
    def as1(self):
1✔
1192
        def use_urls_as_ids(obj):
1✔
1193
            """If id field is missing or not a URL, use the url field."""
1194
            id = obj.get('id')
1✔
1195
            if not id or not (util.is_web(id) or re.match(DOMAIN_RE, id)):
1✔
1196
                if url := util.get_url(obj):
1✔
1197
                    obj['id'] = url
1✔
1198

1199
            for field in 'author', 'actor', 'object':
1✔
1200
                if inner := as1.get_object(obj, field):
1✔
1201
                    use_urls_as_ids(inner)
1✔
1202

1203
        if self.our_as1:
1✔
1204
            obj = self.our_as1
1✔
1205
            if self.source_protocol == 'web':
1✔
1206
                use_urls_as_ids(obj)
1✔
1207

1208
        elif self.as2:
1✔
1209
            obj = as2.to_as1(unwrap(self.as2))
1✔
1210

1211
        elif self.bsky:
1✔
1212
            owner, _, _ = parse_at_uri(self.key.id())
1✔
1213
            ATProto = PROTOCOLS['atproto']
1✔
1214
            handle = ATProto(id=owner).handle
1✔
1215
            try:
1✔
1216
                obj = bluesky.to_as1(self.bsky, repo_did=owner, repo_handle=handle,
1✔
1217
                                     uri=self.key.id(), pds=ATProto.pds_for(self))
1218
            except (ValueError, RequestException):
1✔
1219
                logger.info(f"Couldn't convert to ATProto", exc_info=True)
1✔
1220
                return None
1✔
1221

1222
        elif self.mf2:
1✔
1223
            obj = microformats2.json_to_object(self.mf2,
1✔
1224
                                               rel_urls=self.mf2.get('rel-urls'))
1225
            use_urls_as_ids(obj)
1✔
1226

1227
            # use fetched final URL as id, not u-url
1228
            # https://github.com/snarfed/bridgy-fed/issues/829
1229
            if url := self.mf2.get('url'):
1✔
1230
                obj['id'] = (self.key.id() if self.key and '#' in self.key.id()
1✔
1231
                             else url)
1232

1233
        elif self.nostr:
1✔
1234
            obj = granary.nostr.to_as1(self.nostr)
1✔
1235

1236
        else:
1237
            return None
1✔
1238

1239
        # populate id if necessary
1240
        if self.key:
1✔
1241
            obj.setdefault('id', self.key.id())
1✔
1242

1243
        if util.domain_or_parent_in(obj.get('id'), IMAGE_PROXY_DOMAINS):
1✔
1244
           as1.prefix_urls(obj, 'image', IMAGE_PROXY_URL_BASE)
1✔
1245

1246
        if self.extra_as1:
1✔
1247
            obj.update(self.extra_as1)
1✔
1248

1249
        return obj
1✔
1250

1251
    @ndb.ComputedProperty
1✔
1252
    def type(self):  # AS1 objectType, or verb if it's an activity
1✔
1253
        if self.as1:
1✔
1254
            return as1.object_type(self.as1)
1✔
1255

1256
    def _expire(self):
1✔
1257
        """Automatically delete most Objects after a while using a TTL policy.
1258

1259
        https://cloud.google.com/datastore/docs/ttl
1260

1261
        They recommend not indexing TTL properties:
1262
        https://cloud.google.com/datastore/docs/ttl#ttl_properties_and_indexes
1263
        """
1264
        now = self.updated or util.now()
1✔
1265
        if self.deleted:
1✔
1266
            return now + timedelta(days=1)
1✔
1267
        elif self.type not in DONT_EXPIRE_OBJECT_TYPES:
1✔
1268
            return now + OBJECT_EXPIRE_AGE
1✔
1269

1270
    expire = ndb.ComputedProperty(_expire, indexed=False)
1✔
1271

1272
    def _pre_put_hook(self):
1✔
1273
        """
1274
        * Validate that at:// URIs have DIDs
1275
        * Validate that Nostr ids are nostr:[hex] ids
1276
        * Set/remove the activity label
1277
        * Strip @context from as2 (we don't do LD) to save disk space
1278
        """
1279
        if self.as2:
1✔
1280
           self.as2.pop('@context', None)
1✔
1281
           for field in 'actor', 'attributedTo', 'author', 'object':
1✔
1282
               for val in util.get_list(self.as2, field):
1✔
1283
                   if isinstance(val, dict):
1✔
1284
                       val.pop('@context', None)
1✔
1285

1286
        def check_id(id, proto):
1✔
1287
            if proto in (None, 'ui'):
1✔
1288
                return
1✔
1289

1290
            assert PROTOCOLS[proto].owns_id(id) is not False, \
1✔
1291
                f'Protocol {PROTOCOLS[proto].LABEL} does not own id {id}'
1292

1293
            if proto == 'nostr':
1✔
1294
                assert id.startswith('nostr:'), id
1✔
1295
                assert granary.nostr.ID_RE.match(id.removeprefix('nostr:')), id
1✔
1296

1297
            elif proto == 'atproto':
1✔
1298
                assert id.startswith('at://') or id.startswith('did:'), id
1✔
1299
                if id.startswith('at://'):
1✔
1300
                    repo, _, _ = parse_at_uri(id)
1✔
1301
                    if not repo.startswith('did:'):
1✔
1302
                        # TODO: if we hit this, that means the AppView gave us an AT
1303
                        # URI with a handle repo/authority instead of DID. that's
1304
                        # surprising! ...if so, and if we need to handle it, add a
1305
                        # new arroba.did.canonicalize_at_uri() function, then use it
1306
                        # here, or before.
1307
                        raise ValueError(f'at:// URI ids must have DID repos; got {id}')
1✔
1308

1309
        check_id(self.key.id(), self.source_protocol)
1✔
1310
        for target in self.copies:
1✔
1311
            check_id(target.uri, target.protocol)
1✔
1312

1313
    def _post_put_hook(self, future):
1✔
1314
        # TODO: assert that as1 id is same as key id? in pre put hook?
1315
        logger.debug(f'Wrote {self.key}')
1✔
1316

1317
    @classmethod
1✔
1318
    def get_by_id(cls, id, authed_as=None, **kwargs):
1✔
1319
        """Fetches the :class:`Object` with the given id, if it exists.
1320

1321
        Args:
1322
          id (str)
1323
          authed_as (str): optional; if provided, and a matching :class:`Object`
1324
            already exists, its ``author`` or ``actor`` must contain this actor
1325
            id. Implements basic authorization for updates and deletes.
1326

1327
        Returns:
1328
          Object:
1329

1330
        Raises:
1331
          :class:`werkzeug.exceptions.Forbidden` if ``authed_as`` doesn't match
1332
            the existing object
1333
        """
1334
        obj = super().get_by_id(maybe_truncate_key_id(id), **kwargs)
1✔
1335

1336
        if obj and obj.as1 and authed_as:
1✔
1337
            # authorization: check that the authed user is allowed to modify
1338
            # this object
1339
            # https://www.w3.org/wiki/ActivityPub/Primer/Authentication_Authorization
1340
            proto = PROTOCOLS.get(obj.source_protocol)
1✔
1341
            assert proto, obj.source_protocol
1✔
1342
            owners = [ids.normalize_user_id(id=owner, proto=proto)
1✔
1343
                      for owner in (as1.get_ids(obj.as1, 'author')
1344
                                    + as1.get_ids(obj.as1, 'actor'))
1345
                                    + [id]]
1346
            if (ids.normalize_user_id(id=authed_as, proto=proto) not in owners
1✔
1347
                    and ids.profile_id(id=authed_as, proto=proto) not in owners):
1348
                report_error("Auth: Object: authed_as doesn't match owner",
1✔
1349
                             user=f'{id} authed_as {authed_as} owners {owners}')
1350
                error(f"authed user {authed_as} isn't object owner {owners}",
1✔
1351
                      status=403)
1352

1353
        return obj
1✔
1354

1355
    @classmethod
1✔
1356
    def get_or_create(cls, id, authed_as=None, **props):
1✔
1357
        """Returns an :class:`Object` with the given property values.
1358

1359
        If a matching :class:`Object` doesn't exist in the datastore, creates it
1360
        first. Only populates non-False/empty property values in props into the
1361
        object. Also populates the :attr:`new` and :attr:`changed` properties.
1362

1363
        Not transactional because transactions don't read or write memcache. :/
1364
        Fortunately we don't really depend on atomicity for much, last writer wins
1365
        is usually fine.
1366

1367
        Args:
1368
          authed_as (str): optional; if provided, and a matching :class:`Object`
1369
            already exists, its ``author`` or ``actor`` must contain this actor
1370
            id. Implements basic authorization for updates and deletes.
1371

1372
        Returns:
1373
          Object:
1374

1375
        Raises:
1376
          :class:`werkzeug.exceptions.Forbidden` if ``authed_as`` doesn't match
1377
            the existing object
1378
        """
1379
        key_id = maybe_truncate_key_id(id)
1✔
1380
        obj = cls.get_by_id(key_id, authed_as=authed_as)
1✔
1381

1382
        if not obj:
1✔
1383
            obj = Object(id=key_id, **props)
1✔
1384
            obj.new = True
1✔
1385
            obj.changed = False
1✔
1386
            obj.put()
1✔
1387
            return obj
1✔
1388

1389
        if orig_as1 := obj.as1:
1✔
1390
            # get_by_id() checks authorization if authed_as is set. make sure
1391
            # it's always set for existing objects.
1392
            assert authed_as
1✔
1393

1394
        dirty = False
1✔
1395
        for prop, val in props.items():
1✔
1396
            assert not isinstance(getattr(Object, prop), ndb.ComputedProperty)
1✔
1397
            if prop in ('copies', 'feed', 'notify', 'users'):
1✔
1398
                # merge repeated fields
1399
                for elem in val:
1✔
1400
                    if obj.add(prop, elem):
1✔
1401
                        dirty = True
1✔
1402
            elif val is not None and val != getattr(obj, prop):
1✔
1403
                setattr(obj, prop, val)
1✔
1404
                if (prop in ('as2', 'bsky', 'mf2', 'nostr', 'raw')
1✔
1405
                        and not props.get('our_as1')):
1406
                    obj.our_as1 = None
1✔
1407
                dirty = True
1✔
1408

1409
        obj.new = False
1✔
1410
        obj.changed = obj.activity_changed(orig_as1)
1✔
1411
        if dirty:
1✔
1412
            obj.put()
1✔
1413
        return obj
1✔
1414

1415
    @staticmethod
1✔
1416
    def from_request():
1✔
1417
        """Creates and returns an :class:`Object` from form-encoded JSON parameters.
1418

1419
        Parameters:
1420
          obj_id (str): id of :class:`models.Object` to handle
1421
          *: If ``obj_id`` is unset, all other parameters are properties for a
1422
            new :class:`models.Object` to handle
1423
        """
1424
        if obj_id := request.form.get('obj_id'):
1✔
1425
            return Object.get_by_id(obj_id)
1✔
1426

1427
        props = {field: request.form.get(field)
1✔
1428
                 for field in ('id', 'source_protocol')}
1429

1430
        for json_prop in 'as2', 'bsky', 'mf2', 'our_as1', 'nostr', 'raw':
1✔
1431
            if val := request.form.get(json_prop):
1✔
1432
                props[json_prop] = json_loads(val)
1✔
1433

1434
        obj = Object(**props)
1✔
1435
        if not obj.key and obj.as1:
1✔
1436
            if id := obj.as1.get('id'):
1✔
1437
                obj.key = ndb.Key(Object, id)
1✔
1438

1439
        return obj
1✔
1440

1441
    def to_request(self):
1✔
1442
        """Returns a query parameter dict representing this :class:`Object`."""
1443
        form = {}
1✔
1444

1445
        for json_prop in 'as2', 'bsky', 'mf2', 'our_as1', 'raw':
1✔
1446
            if val := getattr(self, json_prop, None):
1✔
1447
                form[json_prop] = json_dumps(val, sort_keys=True)
1✔
1448

1449
        for prop in ['source_protocol']:
1✔
1450
            if val := getattr(self, prop):
1✔
1451
                form[prop] = val
1✔
1452

1453
        if self.key:
1✔
1454
            form['id'] = self.key.id()
1✔
1455

1456
        return form
1✔
1457

1458
    def activity_changed(self, other_as1):
1✔
1459
        """Returns True if this activity is meaningfully changed from ``other_as1``.
1460

1461
        ...otherwise False.
1462

1463
        Used to populate :attr:`changed`.
1464

1465
        Args:
1466
          other_as1 (dict): AS1 object, or none
1467
        """
1468
        # ignore inReplyTo since we translate it between protocols
1469
        return (as1.activity_changed(self.as1, other_as1, inReplyTo=False)
1✔
1470
                if self.as1 and other_as1
1471
                else bool(self.as1) != bool(other_as1))
1472

1473
    def actor_link(self, image=True, sized=False, user=None):
1✔
1474
        """Returns a pretty HTML link with the actor's name and picture.
1475

1476
        TODO: unify with :meth:`User.user_link`?
1477

1478
        Args:
1479
          image (bool): whether to include an ``img`` tag with the actor's picture
1480
          sized (bool): whether to set an explicit (``width=32``) size on the
1481
            profile picture ``img`` tag
1482
          user (User): current user
1483

1484
        Returns:
1485
          str:
1486
        """
1487
        attrs = {'class': 'h-card u-author'}
1✔
1488

1489
        if user and user.key in self.users:
1✔
1490
            # outbound; show a nice link to the user
1491
            return user.user_link(handle=False, pictures=True)
1✔
1492

1493
        proto = PROTOCOLS.get(self.source_protocol)
1✔
1494

1495
        actor = None
1✔
1496
        if self.as1:
1✔
1497
            actor = (as1.get_object(self.as1, 'actor')
1✔
1498
                     or as1.get_object(self.as1, 'author'))
1499
            # hydrate from datastore if available
1500
            # TODO: optimize! this is called serially in loops, eg in home.html
1501
            if set(actor.keys()) == {'id'} and self.source_protocol:
1✔
1502
                actor_obj = proto.load(actor['id'], remote=False)
1✔
1503
                if actor_obj and actor_obj.as1:
1✔
1504
                    actor = actor_obj.as1
1✔
1505

1506
        if not actor:
1✔
1507
            return ''
1✔
1508
        elif set(actor.keys()) == {'id'}:
1✔
1509
            return common.pretty_link(actor['id'], attrs=attrs, user=user)
1✔
1510

1511
        url = as1.get_url(actor)
1✔
1512
        name = actor.get('displayName') or actor.get('username') or ''
1✔
1513
        img_url = util.get_url(actor, 'image')
1✔
1514
        if not image or not img_url:
1✔
1515
            return common.pretty_link(url, text=name, attrs=attrs, user=user)
1✔
1516

1517
        logo = ''
1✔
1518
        if proto:
1✔
1519
            logo = f'<span class="logo" title="{self.__class__.__name__}">{proto.LOGO_HTML or proto.LOGO_EMOJI}</span>'
×
1520

1521
        return f"""\
1✔
1522
        {logo}
1523
        <a class="h-card u-author" href="{url}" title="{name}">
1524
          <img class="profile" src="{img_url}" {'width="32"' if sized else ''}/>
1525
          <span style="unicode-bidi: isolate">{util.ellipsize(name, chars=40)}</span>
1526
        </a>"""
1527

1528
    def get_copy(self, proto):
1✔
1529
        """Returns the id for the copy of this object in a given protocol.
1530

1531
        ...or None if no such copy exists. If ``proto`` is ``source_protocol``,
1532
        returns this object's key id.
1533

1534
        TODO: for some protocols, we should try harder to find the *right* copy id.
1535
        Eg if if copies has some old garbage entries for this protocol, and we can
1536
        tell that they don't belong to the user's copy account in this protocol, eg
1537
        if the DID in the at:// URI doesn't match, we should skip those and look for
1538
        the matching copy. We'd need the user here though.
1539
        This would help with or fix:
1540
        https://console.cloud.google.com/errors/detail/COK22a6w4O2JVg;locations=global;time=P30D?project=bridgy-federated
1541

1542
        Args:
1543
          proto: :class:`Protocol` subclass
1544

1545
        Returns:
1546
          str:
1547
        """
1548
        if self.source_protocol in (proto.LABEL, proto.ABBREV):
1✔
1549
            return self.key.id()
1✔
1550

1551
        for copy in self.copies:
1✔
1552
            if copy.protocol in (proto.LABEL, proto.ABBREV):
1✔
1553
                return copy.uri
1✔
1554

1555
    def resolve_ids(self):
1✔
1556
        """Replaces "copy" ids, subdomain ids, etc with their originals.
1557

1558
        The end result is that all ids are original "source" ids, ie in the
1559
        protocol that they first came from.
1560

1561
        Specifically, resolves:
1562

1563
        * ids in :class:`User.copies` and :class:`Object.copies`, eg ATProto
1564
          records and Nostr events that we bridged, to the ids of their
1565
          original objects in their source protocol, eg
1566
          ``at://did:plc:abc/app.bsky.feed.post/123`` => ``https://mas.to/@user/456``.
1567
        * Bridgy Fed subdomain URLs to the ids embedded inside them, eg
1568
          ``https://bsky.brid.gy/ap/did:plc:xyz`` => ``did:plc:xyz``
1569
        * ATProto bsky.app URLs to their DIDs or `at://` URIs, eg
1570
          ``https://bsky.app/profile/a.com`` => ``did:plc:123``
1571

1572
        ...in these AS1 fields, in place:
1573

1574
        * ``id``
1575
        * ``actor``
1576
        * ``author``
1577
        * ``object``
1578
        * ``object.actor``
1579
        * ``object.author``
1580
        * ``object.id``
1581
        * ``object.inReplyTo``
1582
        * ``attachments.[objectType=note].id``
1583
        * ``tags.[objectType=mention].url``
1584

1585
        :meth:`protocol.Protocol.translate_ids` is partly the inverse of this.
1586
        Much of the same logic is duplicated there!
1587

1588
        TODO: unify with :meth:`normalize_ids`, :meth:`Object.normalize_ids`.
1589
        """
1590
        if not self.as1:
1✔
1591
            return
1✔
1592

1593
        # extract ids, strip Bridgy Fed subdomain URLs
1594
        outer_obj = unwrap(self.as1)
1✔
1595
        if outer_obj != self.as1:
1✔
1596
            self.our_as1 = util.trim_nulls(outer_obj)
1✔
1597

1598
        self_proto = PROTOCOLS.get(self.source_protocol)
1✔
1599
        if not self_proto:
1✔
1600
            return
1✔
1601

1602
        logger.debug(f'Resolving ids for {self.key.id()}')
1✔
1603
        inner_obj = outer_obj['object'] = as1.get_object(outer_obj)
1✔
1604
        replaced = False
1✔
1605

1606
        def replace(val, orig_fn):
1✔
1607
            id = val.get('id') if isinstance(val, dict) else val
1✔
1608
            if not id or not self_proto.HAS_COPIES:
1✔
1609
                return id
1✔
1610

1611
            orig = orig_fn(id)
1✔
1612
            if not orig:
1✔
1613
                return val
1✔
1614

1615
            nonlocal replaced
1616
            replaced = True
1✔
1617
            logger.debug(f'Resolved copy id {val} to original {orig.id()}')
1✔
1618

1619
            if isinstance(val, dict) and util.trim_nulls(val).keys() > {'id'}:
1✔
1620
                val['id'] = orig.id()
1✔
1621
                return val
1✔
1622
            else:
1623
                return orig.id()
1✔
1624

1625
        # actually replace ids
1626
        #
1627
        # object field could be either object (eg repost) or actor (eg follow)
1628
        outer_obj['object'] = replace(inner_obj, get_original_object_key)
1✔
1629
        if not replaced:
1✔
1630
            outer_obj['object'] = replace(inner_obj, get_original_user_key)
1✔
1631

1632
        for obj in outer_obj, inner_obj:
1✔
1633
            for tag in as1.get_objects(obj, 'tags'):
1✔
1634
                if tag.get('objectType') == 'mention':
1✔
1635
                    tag['url'] = replace(tag.get('url'), get_original_user_key)
1✔
1636
            for att in as1.get_objects(obj, 'attachments'):
1✔
1637
                if att.get('objectType') == 'note':
1✔
1638
                    att['id'] = replace(att.get('id'), get_original_object_key)
1✔
1639
            for field, fn in (
1✔
1640
                    ('actor', get_original_user_key),
1641
                    ('author', get_original_user_key),
1642
                    ('inReplyTo', get_original_object_key),
1643
                ):
1644
                obj[field] = [replace(val, fn) for val in util.get_list(obj, field)]
1✔
1645
                if len(obj[field]) == 1:
1✔
1646
                    obj[field] = obj[field][0]
1✔
1647

1648
        if replaced:
1✔
1649
            self.our_as1 = util.trim_nulls(outer_obj)
1✔
1650

1651
    def normalize_ids(self):
1✔
1652
        """Normalizes ids to their protocol's canonical representation, if any.
1653

1654
        For example, normalizes ATProto ``https://bsky.app/...`` URLs to DIDs
1655
        for profiles, ``at://`` URIs for posts.
1656

1657
        Modifies this object in place.
1658

1659
        TODO: unify with :meth:`resolve_ids`, :meth:`Protocol.translate_ids`.
1660
        """
1661
        from protocol import Protocol
1✔
1662

1663
        if not self.as1:
1✔
1664
            return
1✔
1665

1666
        logger.debug(f'Normalizing ids for {self.key.id()}')
1✔
1667
        outer_obj = copy.deepcopy(self.as1)
1✔
1668
        inner_objs = as1.get_objects(outer_obj)
1✔
1669
        replaced = False
1✔
1670

1671
        def replace(val, translate_fn):
1✔
1672
            nonlocal replaced
1673

1674
            orig = val.get('id') if isinstance(val, dict) else val
1✔
1675
            if not orig:
1✔
1676
                return val
1✔
1677

1678
            proto = Protocol.for_id(orig, remote=False)
1✔
1679
            if not proto:
1✔
1680
                return val
1✔
1681

1682
            translated = translate_fn(id=orig, from_=proto, to=proto)
1✔
1683
            if translated and translated != orig:
1✔
1684
                # logger.debug(f'Normalized {proto.LABEL} id {orig} to {translated}')
1685
                replaced = True
1✔
1686
                if isinstance(val, dict):
1✔
1687
                    val['id'] = translated
1✔
1688
                    return val
1✔
1689
                else:
1690
                    return translated
1✔
1691

1692
            return val
1✔
1693

1694
        # actually replace ids
1695
        for obj in [outer_obj] + inner_objs:
1✔
1696
            for tag in as1.get_objects(obj, 'tags'):
1✔
1697
                if tag.get('objectType') == 'mention':
1✔
1698
                    tag['url'] = replace(tag.get('url'), ids.translate_user_id)
1✔
1699
            for field in ['actor', 'author', 'inReplyTo']:
1✔
1700
                fn = (ids.translate_object_id if field == 'inReplyTo'
1✔
1701
                      else ids.translate_user_id)
1702
                obj[field] = [replace(val, fn) for val in util.get_list(obj, field)]
1✔
1703
                if len(obj[field]) == 1:
1✔
1704
                    obj[field] = obj[field][0]
1✔
1705

1706
        outer_obj['object'] = []
1✔
1707
        for inner_obj in inner_objs:
1✔
1708
            translate_fn = ids.translate_object_id
1✔
1709
            if (as1.object_type(inner_obj) in as1.ACTOR_TYPES
1✔
1710
                    or as1.object_type(outer_obj) in as1.VERBS_WITH_ACTOR_OBJECT):
1711
                translate_fn = ids.translate_user_id
1✔
1712

1713
            got = replace(inner_obj, translate_fn)
1✔
1714
            if isinstance(got, dict) and util.trim_nulls(got).keys() == {'id'}:
1✔
1715
                got = got['id']
1✔
1716

1717
            outer_obj['object'].append(got)
1✔
1718

1719
        if len(outer_obj['object']) == 1:
1✔
1720
            outer_obj['object'] = outer_obj['object'][0]
1✔
1721

1722
        if replaced:
1✔
1723
            self.our_as1 = util.trim_nulls(outer_obj)
1✔
1724

1725

1726
class Follower(ndb.Model):
1✔
1727
    """A follower of a Bridgy Fed user."""
1728
    STATUSES = ('active', 'inactive')
1✔
1729

1730
    from_ = ndb.KeyProperty(name='from', required=True)
1✔
1731
    """The follower."""
1✔
1732
    to = ndb.KeyProperty(required=True)
1✔
1733
    """The followee, ie the user being followed."""
1✔
1734

1735
    follow = ndb.KeyProperty(Object)
1✔
1736
    """The last follow activity."""
1✔
1737
    status = ndb.StringProperty(choices=STATUSES, default='active')
1✔
1738
    """Whether this follow is active or not."""
1✔
1739

1740
    created = ndb.DateTimeProperty(auto_now_add=True)
1✔
1741
    updated = ndb.DateTimeProperty(auto_now=True)
1✔
1742

1743
    # OLD. some stored entities still have these; do not reuse.
1744
    # src = ndb.StringProperty()
1745
    # dest = ndb.StringProperty()
1746
    # last_follow = JsonProperty()
1747

1748
    def _pre_put_hook(self):
1✔
1749
        # we're a bridge! stick with bridging.
1750
        assert self.from_.kind() != self.to.kind(), f'from {self.from_} to {self.to}'
1✔
1751

1752
    def _post_put_hook(self, future):
1✔
1753
        logger.debug(f'Wrote {self.key}')
1✔
1754

1755
    @classmethod
1✔
1756
    def get_or_create(cls, *, from_, to, **kwargs):
1✔
1757
        """Returns a Follower with the given ``from_`` and ``to`` users.
1758

1759
        Not transactional because transactions don't read or write memcache. :/
1760
        Fortunately we don't really depend on atomicity for much, last writer wins
1761
        is usually fine.
1762

1763
        If a matching :class:`Follower` doesn't exist in the datastore, creates
1764
        it first.
1765

1766
        Args:
1767
          from_ (User)
1768
          to (User)
1769

1770
        Returns:
1771
          Follower:
1772
        """
1773
        assert from_
1✔
1774
        assert to
1✔
1775

1776
        follower = Follower.query(Follower.from_ == from_.key,
1✔
1777
                                  Follower.to == to.key,
1778
                                  ).get()
1779
        if not follower:
1✔
1780
            follower = Follower(from_=from_.key, to=to.key, **kwargs)
1✔
1781
            follower.put()
1✔
1782
        elif kwargs:
1✔
1783
            # update existing entity with new property values, eg to make an
1784
            # inactive Follower active again
1785
            for prop, val in kwargs.items():
1✔
1786
                setattr(follower, prop, val)
1✔
1787
            follower.put()
1✔
1788

1789
        return follower
1✔
1790

1791
    @staticmethod
1✔
1792
    def fetch_page(collection, user):
1✔
1793
        r"""Fetches a page of :class:`Follower`\s for a given user.
1794

1795
        Wraps :func:`fetch_page`. Paging uses the ``before`` and ``after`` query
1796
        parameters, if available in the request.
1797

1798
        Args:
1799
          collection (str): ``followers`` or ``following``
1800
          user (User)
1801

1802
        Returns:
1803
          (list of Follower, str, str) tuple: results, annotated with an extra
1804
          ``user`` attribute that holds the follower or following :class:`User`,
1805
          and new str query param values for ``before`` and ``after`` to fetch
1806
          the previous and next pages, respectively
1807
        """
1808
        assert collection in ('followers', 'following'), collection
1✔
1809

1810
        filter_prop = Follower.to if collection == 'followers' else Follower.from_
1✔
1811
        query = Follower.query(
1✔
1812
            Follower.status == 'active',
1813
            filter_prop == user.key,
1814
        )
1815

1816
        followers, before, after = fetch_page(query, Follower, by=Follower.updated)
1✔
1817
        users = ndb.get_multi(f.from_ if collection == 'followers' else f.to
1✔
1818
                              for f in followers)
1819
        User.load_multi(u for u in users if u)
1✔
1820

1821
        for f, u in zip(followers, users):
1✔
1822
            f.user = u
1✔
1823

1824
        followers = [f for f in followers if f.user]
1✔
1825

1826
        # only show followers in protocols that this user is bridged into
1827
        if collection == 'followers':
1✔
1828
            followers = [f for f in followers if user.is_enabled(f.user)]
1✔
1829

1830
        return followers, before, after
1✔
1831

1832

1833
def fetch_objects(query, by=None, user=None):
1✔
1834
    """Fetches a page of :class:`Object` entities from a datastore query.
1835

1836
    Wraps :func:`fetch_page` and adds attributes to the returned
1837
    :class:`Object` entities for rendering in ``objects.html``.
1838

1839
    Args:
1840
      query (ndb.Query)
1841
      by (ndb.model.Property): either :attr:`Object.updated` or
1842
        :attr:`Object.created`
1843
      user (User): current user
1844

1845
    Returns:
1846
      (list of Object, str, str) tuple:
1847
      (results, new ``before`` query param, new ``after`` query param)
1848
      to fetch the previous and next pages, respectively
1849
    """
1850
    assert by is Object.updated or by is Object.created
1✔
1851
    objects, new_before, new_after = fetch_page(query, Object, by=by)
1✔
1852
    objects = [o for o in objects if as1.is_public(o.as1) and not o.deleted]
1✔
1853

1854
    # synthesize human-friendly content for objects
1855
    for i, obj in enumerate(objects):
1✔
1856
        obj_as1 = obj.as1
1✔
1857
        type = as1.object_type(obj_as1)
1✔
1858

1859
        # AS1 verb => human-readable phrase
1860
        phrases = {
1✔
1861
            'accept': 'accepted',
1862
            'article': 'posted',
1863
            'comment': 'replied',
1864
            'delete': 'deleted',
1865
            'follow': 'followed',
1866
            'invite': 'is invited to',
1867
            'issue': 'filed issue',
1868
            'like': 'liked',
1869
            'note': 'posted',
1870
            'post': 'posted',
1871
            'repost': 'reposted',
1872
            'rsvp-interested': 'is interested in',
1873
            'rsvp-maybe': 'might attend',
1874
            'rsvp-no': 'is not attending',
1875
            'rsvp-yes': 'is attending',
1876
            'share': 'reposted',
1877
            'stop-following': 'unfollowed',
1878
            'undo': 'undid',
1879
            'update': 'updated',
1880
        }
1881
        phrases.update({type: 'profile refreshed:' for type in as1.ACTOR_TYPES})
1✔
1882

1883
        obj.phrase = phrases.get(type, '')
1✔
1884

1885
        content = (obj_as1.get('content')
1✔
1886
                   or obj_as1.get('displayName')
1887
                   or obj_as1.get('summary'))
1888
        if content:
1✔
1889
            content = util.parse_html(content).get_text()
1✔
1890

1891
        urls = as1.object_urls(obj_as1)
1✔
1892
        url = urls[0] if urls else None
1✔
1893
        if url and not content:
1✔
1894
            # heuristics for sniffing URLs and converting them to more friendly
1895
            # phrases and user handles.
1896
            # TODO: standardize this into granary.as2 somewhere?
1897
            from activitypub import FEDI_URL_RE
×
1898
            from atproto import COLLECTION_TO_TYPE, did_to_handle
×
1899

1900
            handle = suffix = ''
×
1901
            if match := FEDI_URL_RE.match(url):
×
1902
                handle = match.group(2)
×
1903
                if match.group(4):
×
1904
                    suffix = "'s post"
×
1905
            elif match := BSKY_APP_URL_RE.match(url):
×
1906
                handle = match.group('id')
×
1907
                if match.group('tid'):
×
1908
                    suffix = "'s post"
×
1909
            elif match := AT_URI_PATTERN.match(url):
×
1910
                handle = match.group('repo')
×
1911
                if coll := match.group('collection'):
×
1912
                    suffix = f"'s {COLLECTION_TO_TYPE.get(coll) or 'post'}"
×
1913
                url = bluesky.at_uri_to_web_url(url)
×
1914
            elif url.startswith('did:'):
×
1915
                handle = url
×
1916
                url = bluesky.Bluesky.user_url(handle)
×
1917

1918
            if handle:
×
1919
                if handle.startswith('did:'):
×
1920
                    handle = did_to_handle(handle) or handle
×
1921
                content = f'@{handle}{suffix}'
×
1922

1923
            if url:
×
1924
                content = common.pretty_link(url, text=content, user=user)
×
1925

1926
        obj.content = (obj_as1.get('content')
1✔
1927
                       or obj_as1.get('displayName')
1928
                       or obj_as1.get('summary'))
1929
        obj.url = as1.get_url(obj_as1)
1✔
1930

1931
        if type in ('like', 'follow', 'repost', 'share') or not obj.content:
1✔
1932
            inner_as1 = as1.get_object(obj_as1)
1✔
1933
            obj.inner_url = as1.get_url(inner_as1) or inner_as1.get('id')
1✔
1934
            if obj.url:
1✔
1935
                obj.phrase = common.pretty_link(
1✔
1936
                    obj.url, text=obj.phrase, attrs={'class': 'u-url'}, user=user)
1937
            if content:
1✔
1938
                obj.content = content
1✔
1939
                obj.url = url
1✔
1940
            elif obj.inner_url:
1✔
1941
                obj.content = common.pretty_link(obj.inner_url, max_length=50)
1✔
1942

1943
    return objects, new_before, new_after
1✔
1944

1945

1946
def hydrate(activity, fields=('author', 'actor', 'object')):
1✔
1947
    """Hydrates fields in an AS1 activity, in place.
1948

1949
    Args:
1950
      activity (dict): AS1 activity
1951
      fields (sequence of str): names of fields to hydrate. If they're string ids,
1952
        loads them from the datastore, if possible, and replaces them with their dict
1953
        AS1 objects.
1954

1955
    Returns:
1956
      sequence of :class:`google.cloud.ndb.tasklets.Future`: tasklets for hydrating
1957
        each field. Wait on these before using ``activity``.
1958
    """
1959
    def _hydrate(field):
1✔
1960
        def maybe_set(future):
1✔
1961
            if future.result() and future.result().as1:
1✔
1962
                activity[field] = future.result().as1
1✔
1963
        return maybe_set
1✔
1964

1965
    futures = []
1✔
1966

1967
    for field in fields:
1✔
1968
        val = as1.get_object(activity, field)
1✔
1969
        if val and val.keys() <= set(['id']):
1✔
1970
            # TODO: extract a Protocol class method out of User.profile_id,
1971
            # then use that here instead. the catch is that we'd need to
1972
            # determine Protocol for every id, which is expensive.
1973
            #
1974
            # same TODO is in models.fetch_objects
1975
            id = val['id']
1✔
1976
            if id.startswith('did:'):
1✔
1977
                id = f'at://{id}/app.bsky.actor.profile/self'
×
1978

1979
            future = Object.get_by_id_async(id)
1✔
1980
            future.add_done_callback(_hydrate(field))
1✔
1981
            futures.append(future)
1✔
1982

1983
    return futures
1✔
1984

1985

1986
def fetch_page(query, model_class, by=None):
1✔
1987
    """Fetches a page of results from a datastore query.
1988

1989
    Uses the ``before`` and ``after`` query params (if provided; should be
1990
    ISO8601 timestamps) and the ``by`` property to identify the page to fetch.
1991

1992
    Populates a ``log_url_path`` property on each result entity that points to a
1993
    its most recent logged request.
1994

1995
    Args:
1996
      query (google.cloud.ndb.query.Query)
1997
      model_class (class)
1998
      by (ndb.model.Property): paging property, eg :attr:`Object.updated`
1999
        or :attr:`Object.created`
2000

2001
    Returns:
2002
      (list of Object or Follower, str, str) tuple: (results, new_before,
2003
      new_after), where new_before and new_after are query param values for
2004
      ``before`` and ``after`` to fetch the previous and next pages,
2005
      respectively
2006
    """
2007
    assert by
1✔
2008

2009
    # if there's a paging param ('before' or 'after'), update query with it
2010
    # TODO: unify this with Bridgy's user page
2011
    def get_paging_param(param):
1✔
2012
        val = request.values.get(param)
1✔
2013
        if val:
1✔
2014
            try:
1✔
2015
                dt = util.parse_iso8601(val.replace(' ', '+'))
1✔
2016
            except BaseException as e:
1✔
2017
                error(f"Couldn't parse {param}, {val!r} as ISO8601: {e}")
1✔
2018
            if dt.tzinfo:
1✔
2019
                dt = dt.astimezone(timezone.utc).replace(tzinfo=None)
1✔
2020
            return dt
1✔
2021

2022
    before = get_paging_param('before')
1✔
2023
    after = get_paging_param('after')
1✔
2024
    if before and after:
1✔
2025
        error("can't handle both before and after")
×
2026
    elif after:
1✔
2027
        query = query.filter(by >= after).order(by)
1✔
2028
    elif before:
1✔
2029
        query = query.filter(by < before).order(-by)
1✔
2030
    else:
2031
        query = query.order(-by)
1✔
2032

2033
    query_iter = query.iter()
1✔
2034
    results = sorted(itertools.islice(query_iter, 0, PAGE_SIZE),
1✔
2035
                     key=lambda r: r.updated, reverse=True)
2036

2037
    # calculate new paging param(s)
2038
    has_next = results and query_iter.probably_has_next()
1✔
2039
    new_after = (
1✔
2040
        before if before
2041
        else results[0].updated if has_next and after
2042
        else None)
2043
    if new_after:
1✔
2044
        new_after = new_after.isoformat()
1✔
2045

2046
    new_before = (
1✔
2047
        after if after else
2048
        results[-1].updated if has_next
2049
        else None)
2050
    if new_before:
1✔
2051
        new_before = new_before.isoformat()
1✔
2052

2053
    return results, new_before, new_after
1✔
2054

2055

2056
def maybe_truncate_key_id(id):
1✔
2057
    """Returns id, truncated to ``_MAX_KEYPART_BYTES`` if it's longer."""
2058
    if len(id) > _MAX_KEYPART_BYTES:
1✔
2059
        # TODO: handle Unicode chars. naive approach is to UTF-8 encode,
2060
        # truncate, then decode, but that might cut mid character. easier to just
2061
        # hope/assume the URL is already URL-encoded.
2062
        truncated = id[:_MAX_KEYPART_BYTES]
1✔
2063
        logger.warning(f'Truncating id {id} to {_MAX_KEYPART_BYTES} chars: {truncated}')
1✔
2064
        return truncated
1✔
2065

2066
    return id
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc