• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

snarfed / bridgy-fed / 6bb03b67-8218-41ab-96f6-f6409d110030

29 Nov 2025 06:35PM UTC coverage: 93.012% (+0.04%) from 92.969%
6bb03b67-8218-41ab-96f6-f6409d110030

push

circleci

snarfed
tweaks to id.normalize/translate_user_id, narrow somewhat to just user ids

TODO: what should they return if id is not a valid user id? add and use new is_user_id function?

7 of 7 new or added lines in 2 files covered. (100.0%)

40 existing lines in 4 files now uncovered.

6256 of 6726 relevant lines covered (93.01%)

0.93 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

95.68
/models.py
1
"""Datastore model classes."""
2
import copy
1✔
3
from datetime import timedelta, timezone
1✔
4
from functools import cached_property, lru_cache
1✔
5
import itertools
1✔
6
import json
1✔
7
import logging
1✔
8
import random
1✔
9
import re
1✔
10
from threading import Lock
1✔
11
from urllib.parse import quote, urlparse
1✔
12
import csv
1✔
13
import io
1✔
14

15
from arroba.util import parse_at_uri
1✔
16
import cachetools
1✔
17
from Crypto.PublicKey import RSA
1✔
18
from flask import request
1✔
19
from google.cloud import ndb
1✔
20
from google.cloud.ndb.key import _MAX_KEYPART_BYTES
1✔
21
from granary import as1, as2, atom, bluesky, microformats2
1✔
22
from granary.bluesky import BSKY_APP_URL_RE
1✔
23
import granary.nostr
1✔
24
from granary.source import html_to_text
1✔
25
import humanize
1✔
26
from lexrpc.base import AT_URI_RE
1✔
27
from oauth_dropins.webutil import util
1✔
28
from oauth_dropins.webutil.appengine_info import DEBUG
1✔
29
from oauth_dropins.webutil.flask_util import error
1✔
30
from oauth_dropins.webutil.models import EncryptedProperty, JsonProperty, StringIdModel
1✔
31
from oauth_dropins.webutil.util import ellipsize, json_dumps, json_loads
1✔
32
from requests import RequestException
1✔
33
import secp256k1
1✔
34

35
import common
1✔
36
from common import (
1✔
37
    base64_to_long,
38
    DOMAIN_BLOCKLIST_CANARIES,
39
    DOMAIN_RE,
40
    long_to_base64,
41
    OLD_ACCOUNT_AGE,
42
    PROTOCOL_DOMAINS,
43
    report_error,
44
    unwrap,
45
)
46
import ids
1✔
47
import memcache
1✔
48

49
# maps string label to Protocol subclass. values are populated by ProtocolUserMeta.
50
# (we used to wait for ProtocolUserMeta to populate the keys as well, but that was
51
# awkward to use in datastore model properties with choices, below; it required
52
# overriding them in reset_model_properties, which was always flaky.)
53
PROTOCOLS = {label: None for label in (
1✔
54
    'activitypub',
55
    'ap',
56
    'atproto',
57
    'bsky',
58
    'nostr',
59
    'ostatus',
60
    'web',
61
    'webmention',
62
    'ui',
63
)}
64
DEBUG_PROTOCOLS = (
1✔
65
    'fa',
66
    'fake',
67
    'efake',
68
    'other',
69
)
70
if DEBUG:
1✔
71
    PROTOCOLS.update({label: None for label in DEBUG_PROTOCOLS})
1✔
72

73
# maps string kind (eg 'MagicKey') to Protocol subclass.
74
# populated in ProtocolUserMeta
75
PROTOCOLS_BY_KIND = {}
1✔
76

77
# 2048 bits makes tests slow, so use 1024 for them
78
KEY_BITS = 1024 if DEBUG else 2048
1✔
79
PAGE_SIZE = 20
1✔
80

81
# auto delete most old objects via the Object.expire property
82
# https://cloud.google.com/datastore/docs/ttl
83
#
84
# need to keep follows because we attach them to Followers and use them for
85
# unfollows
86
DONT_EXPIRE_OBJECT_TYPES = (as1.ACTOR_TYPES | as1.POST_TYPES
1✔
87
                            | set(['block', 'flag', 'follow', 'like', 'share']))
88
OBJECT_EXPIRE_AGE = timedelta(days=90)
1✔
89

90
GET_ORIGINALS_CACHE_EXPIRATION = timedelta(days=1)
1✔
91
FOLLOWERS_CACHE_EXPIRATION = timedelta(hours=2)
1✔
92

93
# See https://www.cloudimage.io/
94
IMAGE_PROXY_URL_BASE = 'https://xaasg3w5.cloudimg.io/'
1✔
95
IMAGE_PROXY_DOMAINS = ('threads.net',)
1✔
96

97
USER_STATUS_DESCRIPTIONS = {  # keep in sync with DM.type!
1✔
98
    'moved': 'account has migrated to another account',
99
    'no-feed-or-webmention': "web site doesn't have an RSS or Atom feed or webmention endpoint",
100
    'nobot': "profile has 'nobot' in it",
101
    'nobridge': "profile has 'nobridge' in it",
102
    'no-nip05': "account's NIP-05 identifier is missing or invalid",
103
    'no-profile': 'profile is missing or empty',
104
    'opt-out': 'account or instance has requested to be opted out',
105
    'owns-webfinger': 'web site looks like a fediverse instance because it already serves Webfinger',
106
    'private': 'account is set as private or protected',
107
    'requires-avatar': "account doesn't have a profile picture",
108
    'requires-name': "account's name and username are the same",
109
    'requires-old-account': f"account is less than {humanize.naturaldelta(OLD_ACCOUNT_AGE)} old",
110
    'unsupported-handle-ap': f"<a href='https://fed.brid.gy/docs#fediverse-get-started'>username has characters that Bridgy Fed doesn't currently support</a>",
111
}
112

113
logger = logging.getLogger(__name__)
1✔
114

115

116
class Target(ndb.Model):
1✔
117
    r""":class:`protocol.Protocol` + URI pairs for identifying objects.
118

119
    These are currently used for:
120

121
    * delivery destinations, eg ActivityPub inboxes, webmention targets, etc.
122
    * copies of :class:`Object`\s and :class:`User`\s elsewhere,
123
      eg ``at://`` URIs for ATProto records, nevent etc bech32-encoded Nostr ids,
124
      ATProto user DIDs, etc.
125

126
    Used in :class:`google.cloud.ndb.model.StructuredProperty`\s inside
127
    :class:`Object` and :class:`User`; not stored as top-level entities in the
128
    datastore.
129

130
    ndb implements this by hoisting each property here into a corresponding
131
    property on the parent entity, prefixed by the StructuredProperty name
132
    below, eg ``delivered.uri``, ``delivered.protocol``, etc.
133

134
    For repeated StructuredPropertys, the hoisted properties are all repeated on
135
    the parent entity, and reconstructed into StructuredPropertys based on their
136
    order.
137

138
    https://googleapis.dev/python/python-ndb/latest/model.html#google.cloud.ndb.model.StructuredProperty
139
    """
140
    uri = ndb.StringProperty(required=True)
1✔
141
    ''
1✔
142
    protocol = ndb.StringProperty(choices=list(PROTOCOLS.keys()), required=True)
1✔
143
    ''
1✔
144

145
    def __eq__(self, other):
1✔
146
        """Equality excludes Targets' :class:`Key`."""
147
        if isinstance(other, Target):
1✔
148
            return self.uri == other.uri and self.protocol == other.protocol
1✔
149

150
    def __hash__(self):
1✔
151
        """Allow hashing so these can be dict keys."""
152
        return hash((self.protocol, self.uri))
1✔
153

154

155
class DM(ndb.Model):
1✔
156
    """:class:`protocol.Protocol` + type pairs for identifying sent DMs.
157

158
    Used in :attr:`User.sent_dms`.
159

160
    https://googleapis.dev/python/python-ndb/latest/model.html#google.cloud.ndb.model.StructuredProperty
161
    """
162
    type = ndb.StringProperty(required=True)
1✔
163
    """Known values (keep in sync with USER_STATUS_DESCRIPTIONS, the subset for
1✔
164
    ineligible users):
165

166
      * dms_not_supported-[RECIPIENT-USER-ID]
167
      * moved
168
      * no-feed-or-webmention
169
      * no-nip05
170
      * no-profile
171
      * opt-out
172
      * owns-webfinger
173
      * private
174
      * replied_to_bridged_user
175
      * request_bridging
176
      * requires-avatar
177
      * requires-name
178
      * requires-old-account
179
      * unsupported-handle-ap
180
      * welcome
181
    """
182
    protocol = ndb.StringProperty(choices=list(PROTOCOLS.keys()), required=True)
1✔
183
    ''
1✔
184

185
    def __eq__(self, other):
1✔
186
        """Equality excludes Targets' :class:`Key`."""
187
        return self.type == other.type and self.protocol == other.protocol
1✔
188

189

190
class ProtocolUserMeta(type(ndb.Model)):
1✔
191
    """:class:`User` metaclass. Registers all subclasses in ``PROTOCOLS``."""
192
    def __new__(meta, name, bases, class_dict):
1✔
193
        cls = super().__new__(meta, name, bases, class_dict)
1✔
194

195
        label = getattr(cls, 'LABEL', None)
1✔
196
        if (label and label not in ('protocol', 'user')
1✔
197
                and (DEBUG or cls.LABEL not in DEBUG_PROTOCOLS)):
198
            for label in (label, cls.ABBREV) + cls.OTHER_LABELS:
1✔
199
                if label:
1✔
200
                    PROTOCOLS[label] = cls
1✔
201
            PROTOCOLS_BY_KIND[cls._get_kind()] = cls
1✔
202

203
        return cls
1✔
204

205

206
def reset_protocol_properties():
1✔
207
    """Recreates various protocol properties to include choices from ``PROTOCOLS``."""
208
    abbrevs = f'({"|".join(PROTOCOLS.keys())}|fed)'
1✔
209
    common.SUBDOMAIN_BASE_URL_RE = re.compile(
1✔
210
        rf'^https?://({abbrevs}\.brid\.gy|localhost(:8080)?)/(convert/|r/)?({abbrevs}/)?(?P<path>.+)')
211
    ids.COPIES_PROTOCOLS = tuple(label for label, proto in PROTOCOLS.items()
1✔
212
                                 if proto and proto.HAS_COPIES)
213

214

215
@lru_cache(maxsize=100000)
1✔
216
@memcache.memoize(expire=GET_ORIGINALS_CACHE_EXPIRATION)
1✔
217
def get_original_object_key(copy_id):
1✔
218
    """Finds the :class:`Object` with a given copy id, if any.
219

220
    Note that :meth:`Object.add` also updates this function's
221
    :func:`memcache.memoize` cache.
222

223
    Args:
224
      copy_id (str)
225

226
    Returns:
227
      google.cloud.ndb.Key or None
228
    """
229
    assert copy_id
1✔
230

231
    return Object.query(Object.copies.uri == copy_id).get(keys_only=True)
1✔
232

233

234
@lru_cache(maxsize=100000)
1✔
235
@memcache.memoize(expire=GET_ORIGINALS_CACHE_EXPIRATION)
1✔
236
def get_original_user_key(copy_id):
1✔
237
    """Finds the user with a given copy id, if any.
238

239
    Note that :meth:`User.add` also updates this function's
240
    :func:`memcache.memoize` cache.
241

242
    Args:
243
      copy_id (str)
244

245
    Returns:
246
      google.cloud.ndb.Key or None
247
    """
248
    assert copy_id
1✔
249

250
    for proto in PROTOCOLS.values():
1✔
251
        if proto and proto.LABEL != 'ui' and not proto.owns_id(copy_id):
1✔
252
            if orig := proto.query(proto.copies.uri == copy_id).get(keys_only=True):
1✔
253
                return orig
1✔
254

255

256
class AddRemoveMixin:
1✔
257
    """Mixin class that defines the :meth:`add` and :meth:`remove` methods.
258

259
    If a subclass of this mixin defines the ``GET_ORIGINAL_FN`` class-level
260
    attribute, its memoize cache will be cleared when :meth:`remove` is called with
261
    the ``copies`` property.
262
    """
263

264
    lock = None
1✔
265
    """Synchronizes :meth:`add`, :meth:`remove`, etc."""
1✔
266

267
    def __init__(self, *args, **kwargs):
1✔
268
        super().__init__(*args, **kwargs)
1✔
269
        self.lock = Lock()
1✔
270

271
    def add(self, prop, val):
1✔
272
        """Adds a value to a multiply-valued property.
273

274
        Args:
275
          prop (str)
276
          val
277

278
        Returns:
279
          True if val was added, ie it wasn't already in prop, False otherwise
280
        """
281
        with self.lock:
1✔
282
            added = util.add(getattr(self, prop), val)
1✔
283

284
        if prop == 'copies' and added:
1✔
285
            if fn := getattr(self, 'GET_ORIGINAL_FN'):
1✔
286
                memcache.pickle_memcache.set(memcache.memoize_key(fn, val.uri),
1✔
287
                                             self.key)
288

289
        return added
1✔
290

291
    def remove(self, prop, val):
1✔
292
        """Removes a value from a multiply-valued property.
293

294
        Args:
295
          prop (str)
296
          val
297
        """
298
        with self.lock:
1✔
299
            existing = getattr(self, prop)
1✔
300
            if val in existing:
1✔
301
                existing.remove(val)
1✔
302

303
        if prop == 'copies':
1✔
304
            self.clear_get_original_cache(val.uri)
1✔
305

306
    def remove_copies_on(self, proto):
1✔
307
        """Removes all copies on a given protocol.
308

309
        ``proto.HAS_COPIES`` must be True.
310

311
        Args:
312
          proto (protocol.Protocol subclass)
313
        """
314
        assert proto.HAS_COPIES
1✔
315

316
        for copy in self.copies:
1✔
317
            if copy.protocol in (proto.ABBREV, proto.LABEL):
1✔
318
                self.remove('copies', copy)
1✔
319

320
    @classmethod
1✔
321
    def clear_get_original_cache(cls, uri):
1✔
322
        if fn := getattr(cls, 'GET_ORIGINAL_FN'):
1✔
323
            memcache.pickle_memcache.delete(memcache.memoize_key(fn, uri))
1✔
324

325

326
# WARNING: AddRemoveMixin *must* be before StringIdModel here so that its __init__
327
# gets called! Due to an (arguable) ndb.Model bug:
328
# https://github.com/googleapis/python-ndb/issues/1025
329
class User(AddRemoveMixin, StringIdModel, metaclass=ProtocolUserMeta):
1✔
330
    """Abstract base class for a Bridgy Fed user.
331

332
    Stores some protocols' keypairs. Currently:
333

334
    * RSA keypair for ActivityPub HTTP Signatures
335
      properties: ``mod``, ``public_exponent``, ``private_exponent``, all
336
      encoded as base64url (ie URL-safe base64) strings as described in RFC
337
      4648 and section 5.1 of the Magic Signatures spec:
338
      https://tools.ietf.org/html/draft-cavage-http-signatures-12
339
    * *Not* K-256 signing or rotation keys for AT Protocol, those are stored in
340
      :class:`arroba.datastore_storage.AtpRepo` entities
341
    """
342
    GET_ORIGINAL_FN = get_original_user_key
1✔
343
    'used by AddRemoveMixin'
1✔
344

345
    obj_key = ndb.KeyProperty(kind='Object')  # user profile
1✔
346
    ''
1✔
347
    use_instead = ndb.KeyProperty()
1✔
348
    ''
1✔
349

350
    copies = ndb.StructuredProperty(Target, repeated=True)
1✔
351
    """Proxy copies of this user elsewhere, eg DIDs for ATProto records, bech32
1✔
352
    npub Nostr ids, etc. Similar to ``rel-me`` links in microformats2,
353
    ``alsoKnownAs`` in DID docs (and now AS2), etc.
354
    """
355

356
    mod = ndb.StringProperty()
1✔
357
    """Part of the bridged ActivityPub actor's private key."""
1✔
358
    public_exponent = ndb.StringProperty()
1✔
359
    """Part of the bridged ActivityPub actor's private key."""
1✔
360
    private_exponent = ndb.StringProperty()
1✔
361
    """Part of the bridged ActivityPub actor's private key."""
1✔
362
    nostr_key_bytes = EncryptedProperty()
1✔
363
    """The bridged Nostr account's secp256k1 private key, in raw bytes."""
1✔
364

365
    manual_opt_out = ndb.BooleanProperty()
1✔
366
    """Set to True to manually disable this user. Set to False to override spam filters and forcibly enable this user."""
1✔
367

368
    enabled_protocols = ndb.StringProperty(repeated=True,
1✔
369
                                           choices=list(PROTOCOLS.keys()))
370
    """Protocols that this user has explicitly opted into.
1✔
371

372
    Protocols that don't require explicit opt in are omitted here.
373
    """
374

375
    sent_dms = ndb.StructuredProperty(DM, repeated=True)
1✔
376
    """DMs that we've attempted to send to this user."""
1✔
377

378
    send_notifs = ndb.StringProperty(default='all', choices=('all', 'none'))
1✔
379
    """Which notifications we should send this user."""
1✔
380

381
    blocks = ndb.KeyProperty(kind='Object', repeated=True)
1✔
382
    ''
1✔
383

384
    created = ndb.DateTimeProperty(auto_now_add=True)
1✔
385
    ''
1✔
386
    updated = ndb.DateTimeProperty(auto_now=True)
1✔
387
    ''
1✔
388

389
    # `existing` attr is set by get_or_create
390

391
    # OLD. some stored entities still have these; do not reuse.
392
    # direct = ndb.BooleanProperty(default=False)
393
    # actor_as2 = JsonProperty()
394
    # protocol-specific state
395
    # atproto_notifs_indexed_at = ndb.TextProperty()
396
    # atproto_feed_indexed_at = ndb.TextProperty()
397

398
    def __init__(self, **kwargs):
1✔
399
        """Constructor.
400

401
        Sets :attr:`obj` explicitly because however
402
        :class:`google.cloud.ndb.model.Model` sets it doesn't work with
403
        ``@property`` and ``@obj.setter`` below.
404
        """
405
        obj = kwargs.pop('obj', None)
1✔
406
        super().__init__(**kwargs)
1✔
407

408
        if obj:
1✔
409
            self.obj = obj
1✔
410

411
    @classmethod
1✔
412
    def new(cls, **kwargs):
1✔
413
        """Try to prevent instantiation. Use subclasses instead."""
UNCOV
414
        raise NotImplementedError()
×
415

416
    def _post_put_hook(self, future):
1✔
417
        logger.debug(f'Wrote {self.key}')
1✔
418

419
    @classmethod
1✔
420
    def get_by_id(cls, id, allow_opt_out=False, **kwargs):
1✔
421
        """Override to follow ``use_instead`` property and ``status``.
422

423
        Returns None if the user is opted out.
424
        """
425
        user = cls._get_by_id(id, **kwargs)
1✔
426
        if user and user.use_instead:
1✔
427
            logger.debug(f'{user.key} use_instead => {user.use_instead}')
1✔
428
            user = user.use_instead.get()
1✔
429

430
        if not user:
1✔
431
            return None
1✔
432

433
        if user.status and not allow_opt_out:
1✔
434
            logger.info(f'{user.key} is {user.status}')
1✔
435
            return None
1✔
436

437
        return user
1✔
438

439
    @classmethod
1✔
440
    def get_or_create(cls, id, propagate=False, allow_opt_out=False,
1✔
441
                      reload=False, **kwargs):
442
        """Loads and returns a :class:`User`. Creates it if necessary.
443

444
        Not transactional because transactions don't read or write memcache. :/
445
        Fortunately we don't really depend on atomicity for much, last writer wins
446
        is usually fine.
447

448
        Args:
449
          propagate (bool): whether to create copies of this user in push-based
450
            protocols, eg ATProto and Nostr.
451
          allow_opt_out (bool): whether to allow and create the user if they're
452
            currently opted out
453
          reload (bool): whether to reload profile always, vs only if necessary
454
          kwargs: passed through to ``cls`` constructor
455

456
        Returns:
457
          User: existing or new user, or None if the user is opted out
458
        """
459
        assert cls != User
1✔
460

461
        user = cls.get_by_id(id, allow_opt_out=True)
1✔
462
        if user:  # existing
1✔
463
            if reload:
1✔
464
                user.reload_profile(gateway=True, raise_=False)
1✔
465

466
            if user.status and not allow_opt_out:
1✔
467
                return None
1✔
468
            user.existing = True
1✔
469

470
            # TODO: propagate more fields?
471
            changed = False
1✔
472
            for field in ['obj', 'obj_key']:
1✔
473
                old_val = getattr(user, field, None)
1✔
474
                new_val = kwargs.get(field)
1✔
475
                if old_val is None and new_val is not None:
1✔
476
                    setattr(user, field, new_val)
×
UNCOV
477
                    changed = True
×
478

479
            if enabled_protocols := kwargs.get('enabled_protocols'):
1✔
480
                user.enabled_protocols = (set(user.enabled_protocols)
1✔
481
                                          | set(enabled_protocols))
482
                changed = True
1✔
483

484
            if not propagate:
1✔
485
                if changed:
1✔
486
                    try:
1✔
487
                        user.put()
1✔
488
                    except AssertionError as e:
×
UNCOV
489
                        error(f'Bad {cls.__name__} id {id} : {e}')
×
490
                return user
1✔
491

492
        else:  # new, not existing
493
            if orig_key := get_original_user_key(id):
1✔
494
                orig = orig_key.get()
1✔
495
                if orig.status and not allow_opt_out:
1✔
UNCOV
496
                    return None
×
497
                orig.existing = False
1✔
498
                return orig
1✔
499

500
            user = cls(id=id, **kwargs)
1✔
501
            user.existing = False
1✔
502
            try:
1✔
503
                user.reload_profile(gateway=True, raise_=False)
1✔
504
            except AssertionError as e:
1✔
505
                error(f'Bad {cls.__name__} id {id} : {e}')
1✔
506

507
            if user.status and not allow_opt_out:
1✔
508
                return None
1✔
509

510
        if propagate and user.status in (None, 'private'):
1✔
511
            for label in user.enabled_protocols + list(user.DEFAULT_ENABLED_PROTOCOLS):
1✔
512
                proto = PROTOCOLS[label]
1✔
513
                if proto == cls:
1✔
UNCOV
514
                    continue
×
515
                elif proto.HAS_COPIES:
1✔
516
                    if not user.get_copy(proto) and user.is_enabled(proto):
1✔
517
                        try:
1✔
518
                            proto.create_for(user)
1✔
519
                        except (ValueError, AssertionError):
1✔
520
                            logger.info(f'failed creating {proto.LABEL} copy',
1✔
521
                                        exc_info=True)
522
                            user.remove('enabled_protocols', proto.LABEL)
1✔
523
                    else:
524
                        logger.debug(f'{proto.LABEL} not enabled or user copy already exists, skipping propagate')
1✔
525

526
        try:
1✔
527
            user.put()
1✔
528
        except AssertionError as e:
×
UNCOV
529
            error(f'Bad {cls.__name__} id {id} : {e}')
×
530

531
        logger.debug(('Updated ' if user.existing else 'Created new ') + str(user))
1✔
532
        return user
1✔
533

534
    @property
1✔
535
    def obj(self):
1✔
536
        """Convenience accessor that loads :attr:`obj_key` from the datastore."""
537
        if self.obj_key:
1✔
538
            if not hasattr(self, '_obj'):
1✔
539
                self._obj = self.obj_key.get()
1✔
540
            return self._obj
1✔
541

542
    @obj.setter
1✔
543
    def obj(self, obj):
1✔
544
        if obj:
1✔
545
            assert isinstance(obj, Object)
1✔
546
            assert obj.key
1✔
547
            self._obj = obj
1✔
548
            self.obj_key = obj.key
1✔
549
        else:
550
            self._obj = self.obj_key = None
1✔
551

552
    def delete(self, proto=None):
1✔
553
        """Deletes a user's bridged actors in all protocols or a specific one.
554

555
        Args:
556
          proto (Protocol): optional
557
        """
558
        now = util.now().isoformat()
1✔
559
        proto_label = proto.LABEL if proto else 'all'
1✔
560
        delete_id = f'{self.profile_id()}#bridgy-fed-delete-user-{proto_label}-{now}'
1✔
561
        delete = Object(id=delete_id, source_protocol=self.LABEL, our_as1={
1✔
562
            'id': delete_id,
563
            'objectType': 'activity',
564
            'verb': 'delete',
565
            'actor': self.key.id(),
566
            'object': self.key.id(),
567
        })
568
        self.deliver(delete, from_user=self, to_proto=proto)
1✔
569

570
    @classmethod
1✔
571
    def load_multi(cls, users):
1✔
572
        """Loads :attr:`obj` for multiple users in parallel.
573

574
        Args:
575
          users (sequence of User)
576
        """
577
        objs = ndb.get_multi(u.obj_key for u in users if u.obj_key)
1✔
578
        keys_to_objs = {o.key: o for o in objs if o}
1✔
579

580
        for u in users:
1✔
581
            u._obj = keys_to_objs.get(u.obj_key)
1✔
582

583
    @ndb.ComputedProperty
1✔
584
    def handle(self):
1✔
585
        """This user's unique, human-chosen handle, eg ``@me@snarfed.org``.
586

587
        To be implemented by subclasses.
588
        """
UNCOV
589
        raise NotImplementedError()
×
590

591
    @ndb.ComputedProperty
1✔
592
    def handle_as_domain(self):
1✔
593
        """This user's handle in domain-like format, via :func:`id.handle_as_domain`.
594

595
        Returns:
596
          str or None: if handle is None
597
        """
598
        return ids.handle_as_domain(self.handle)
1✔
599

600
    @ndb.ComputedProperty
1✔
601
    def status(self):
1✔
602
        """Whether this user is blocked or opted out.
603

604
        Optional. See :attr:`USER_STATUS_DESCRIPTIONS` for possible values.
605
        """
606
        if self.manual_opt_out:
1✔
607
            return 'opt-out'
1✔
608
        elif self.manual_opt_out is False:
1✔
609
            return None
1✔
610

611
        # TODO: require profile for more protocols? all?
612
        if not self.obj or not self.obj.as1:
1✔
613
            return None
1✔
614

615
        if self.obj.as1.get('bridgeable') is False:  # FEP-0036
1✔
616
            return 'opt-out'
1✔
617

618
        if self.REQUIRES_AVATAR and not self.obj.as1.get('image'):
1✔
619
            return 'requires-avatar'
1✔
620

621
        name = self.obj.as1.get('displayName')
1✔
622
        if self.REQUIRES_NAME and (not name or name in (self.handle, self.key.id())):
1✔
623
            return 'requires-name'
1✔
624

625
        if self.REQUIRES_OLD_ACCOUNT:
1✔
626
            if published := self.obj.as1.get('published'):
1✔
627
                if util.now() - util.parse_iso8601(published) < OLD_ACCOUNT_AGE:
1✔
628
                    return 'requires-old-account'
1✔
629

630
        # https://swicg.github.io/miscellany/#movedTo
631
        # https://docs.joinmastodon.org/spec/activitypub/#as
632
        if self.obj.as1.get('movedTo'):
1✔
633
            return 'moved'
1✔
634

635
        summary = html_to_text(self.obj.as1.get('summary', ''), ignore_links=True)
1✔
636
        name = html_to_text(self.obj.as1.get('displayName', ''), ignore_links=True)
1✔
637

638
        # #nobridge overrides enabled_protocols
639
        if '#nobridge' in summary or '#nobridge' in name:
1✔
640
            return 'nobridge'
1✔
641

642
        # user has explicitly opted in. should go after spam filter (REQUIRES_*)
643
        # checks, but before is_public and #nobot
644
        #
645
        # !!! WARNING: keep in sync with User.enable_protocol!
646
        if self.enabled_protocols:
1✔
647
            return None
1✔
648

649
        if not as1.is_public(self.obj.as1, unlisted=False):
1✔
650
            return 'private'
1✔
651

652
        # enabled_protocols overrides #nobot
653
        if '#nobot' in summary or '#nobot' in name:
1✔
654
            return 'nobot'
1✔
655

656
    def is_enabled(self, to_proto, explicit=False):
1✔
657
        """Returns True if this user can be bridged to a given protocol.
658

659
        Reasons this might return False:
660
        * We haven't turned on bridging these two protocols yet.
661
        * The user is opted out or blocked.
662
        * The user is on a domain that's opted out or blocked.
663
        * The from protocol requires opt in, and the user hasn't opted in.
664
        * ``explicit`` is True, and this protocol supports ``to_proto`` by, but the user hasn't explicitly opted into it.
665

666
        Args:
667
          to_proto (Protocol subclass)
668
          explicit (bool)
669

670
        Returns:
671
          bool:
672
        """
673
        from protocol import Protocol
1✔
674
        assert isinstance(to_proto, Protocol) or issubclass(to_proto, Protocol)
1✔
675

676
        if self.__class__ == to_proto:
1✔
677
            return True
1✔
678

679
        from_label = self.LABEL
1✔
680
        to_label = to_proto.LABEL
1✔
681

682
        if bot_protocol := Protocol.for_bridgy_subdomain(self.key.id()):
1✔
683
            return to_proto != bot_protocol
1✔
684

685
        elif self.manual_opt_out:
1✔
686
            return False
1✔
687

688
        elif to_label in self.enabled_protocols:
1✔
689
            return True
1✔
690

691
        elif self.status:
1✔
692
            return False
1✔
693

694
        elif to_label in self.DEFAULT_ENABLED_PROTOCOLS and not explicit:
1✔
695
            return True
1✔
696

697
        return False
1✔
698

699
    def enable_protocol(self, to_proto):
1✔
700
        """Adds ``to_proto`` to :attr:`enabled_protocols`.
701

702
        Also sends a welcome DM to the user (via a send task) if their protocol
703
        supports DMs.
704

705
        Args:
706
          to_proto (:class:`protocol.Protocol` subclass)
707
        """
708
        import dms
1✔
709

710
        # explicit opt-in overrides some status
711
        # !!! WARNING: keep in sync with User.status!
712
        ineligible = """Hi! Your account isn't eligible for bridging yet because your {desc}. <a href="https://fed.brid.gy/docs#troubleshooting">More details here.</a> You can try again once that's fixed by unfollowing and re-following this account."""
1✔
713
        if self.status and self.status not in ('nobot', 'private'):
1✔
714
            if desc := USER_STATUS_DESCRIPTIONS.get(self.status):
1✔
715
                dms.maybe_send(from_=to_proto, to_user=self, type=self.status,
1✔
716
                               text=ineligible.format(desc=desc))
717
            common.error(f'Nope, user {self.key.id()} is {self.status}', status=299)
1✔
718

719
        try:
1✔
720
            self.handle_as(to_proto)
1✔
721
        except ValueError as e:
1✔
722
            dms.maybe_send(from_=to_proto, to_user=self,
1✔
723
                           type=f'unsupported-handle-{to_proto.ABBREV}',
724
                           text=ineligible.format(desc=e))
725
            common.error(str(e), status=299)
1✔
726

727
        if to_proto.LABEL in ids.COPIES_PROTOCOLS:
1✔
728
            # do this even if there's an existing copy since we might need to
729
            # reactivate it, which create_for should do
730
            to_proto.create_for(self)
1✔
731

732
        if to_proto.LABEL not in self.enabled_protocols:
1✔
733
            self.enabled_protocols.append(to_proto.LABEL)
1✔
734
            dms.maybe_send(from_=to_proto, to_user=self, type='welcome', text=f"""Welcome to Bridgy Fed! Your account will soon be bridged to {to_proto.PHRASE} at {self.user_link(proto=to_proto, name=False)}. <a href="https://fed.brid.gy/docs">See the docs</a> and <a href="https://{common.PRIMARY_DOMAIN}{self.user_page_path()}">your user page</a> for more information. To disable this and delete your bridged profile, block this account.""")
1✔
735
            self.put()
1✔
736

737
        msg = f'Enabled {to_proto.LABEL} for {self.key.id()} : {self.user_page_path()}'
1✔
738
        logger.info(msg)
1✔
739

740
    def disable_protocol(self, to_proto):
1✔
741
        """Removes ``to_proto` from :attr:`enabled_protocols``.
742

743
        Args:
744
          to_proto (:class:`protocol.Protocol` subclass)
745
        """
746
        self.remove('enabled_protocols', to_proto.LABEL)
1✔
747
        self.put()
1✔
748
        msg = f'Disabled {to_proto.LABEL} for {self.key.id()} : {self.user_page_path()}'
1✔
749
        logger.info(msg)
1✔
750

751
    def handle_as(self, to_proto, short=False):
1✔
752
        """Returns this user's handle in a different protocol.
753

754
        Args:
755
          to_proto (str or Protocol)
756
          short (bool): whether to return the full handle or a shortened form.
757
            Default False. Currently only affects ActivityPub; returns just
758
            ``@[user]`` instead of ``@[user]@[domain]``
759

760
        Returns:
761
          str
762
        """
763
        if isinstance(to_proto, str):
1✔
764
            to_proto = PROTOCOLS[to_proto]
1✔
765

766
        # override to-ATProto to use custom domain handle in DID doc
767
        from atproto import ATProto, did_to_handle
1✔
768
        if to_proto == ATProto:
1✔
769
            if did := self.get_copy(ATProto):
1✔
770
                if handle := did_to_handle(did, remote=False):
1✔
771
                    return handle
1✔
772

773
        # override web users to always use domain instead of custom username
774
        # TODO: fall back to id if handle is unset?
775
        handle = self.key.id() if self.LABEL == 'web' else self.handle
1✔
776
        if not handle:
1✔
777
            return None
1✔
778

779
        return ids.translate_handle(handle=handle, from_=self.__class__,
1✔
780
                                    to=to_proto, short=short)
781

782
    def id_as(self, to_proto):
1✔
783
        """Returns this user's id in a different protocol.
784

785
        Args:
786
          to_proto (str or Protocol)
787

788
        Returns:
789
          str
790
        """
791
        if isinstance(to_proto, str):
1✔
792
            to_proto = PROTOCOLS[to_proto]
1✔
793

794
        return ids.translate_user_id(id=self.key.id(), from_=self.__class__,
1✔
795
                                     to=to_proto)
796

797
    def handle_or_id(self):
1✔
798
        """Returns handle if we know it, otherwise id."""
799
        return self.handle or self.key.id()
1✔
800

801
    def public_pem(self):
1✔
802
        """Returns the user's PEM-encoded ActivityPub public RSA key.
803

804
        Returns:
805
          bytes:
806
        """
807
        self._maybe_generate_ap_key()
1✔
808
        rsa = RSA.construct((base64_to_long(str(self.mod)),
1✔
809
                             base64_to_long(str(self.public_exponent))))
810
        return rsa.exportKey(format='PEM')
1✔
811

812
    def private_pem(self):
1✔
813
        """Returns the user's PEM-encoded ActivityPub private RSA key.
814

815
        Returns:
816
          bytes:
817
        """
818
        self._maybe_generate_ap_key()
1✔
819
        rsa = RSA.construct((base64_to_long(str(self.mod)),
1✔
820
                             base64_to_long(str(self.public_exponent)),
821
                             base64_to_long(str(self.private_exponent))))
822
        return rsa.exportKey(format='PEM')
1✔
823

824
    def _maybe_generate_ap_key(self):
1✔
825
        """Generates this user's ActivityPub private key if necessary."""
826
        if not self.public_exponent or not self.private_exponent or not self.mod:
1✔
827
            logger.info(f'generating AP keypair for {self.key}')
1✔
828
            assert (not self.public_exponent and not self.private_exponent
1✔
829
                    and not self.mod), id
830
            key = RSA.generate(KEY_BITS, randfunc=random.randbytes if DEBUG else None)
1✔
831
            self.mod = long_to_base64(key.n)
1✔
832
            self.public_exponent = long_to_base64(key.e)
1✔
833
            self.private_exponent = long_to_base64(key.d)
1✔
834
            self.put()
1✔
835

836
    def nsec(self):
1✔
837
        """Returns the user's bech32-encoded Nostr private secp256k1 key.
838

839
        Returns:
840
          str:
841
        """
842
        self._maybe_generate_nostr_key()
1✔
843
        privkey = secp256k1.PrivateKey(self.nostr_key_bytes, raw=True)
1✔
844
        return granary.nostr.bech32_encode('nsec', privkey.serialize())
1✔
845

846
    def hex_pubkey(self):
1✔
847
        """Returns the user's hex-encoded Nostr public secp256k1 key.
848

849
        Returns:
850
          str:
851
        """
852
        self._maybe_generate_nostr_key()
1✔
853
        return granary.nostr.pubkey_from_privkey(self.nostr_key_bytes.hex())
1✔
854

855
    def npub(self):
1✔
856
        """Returns the user's bech32-encoded ActivityPub public secp256k1 key.
857

858
        Returns:
859
          str:
860
        """
861
        return granary.nostr.bech32_encode('npub', self.hex_pubkey())
1✔
862

863
    def _maybe_generate_nostr_key(self):
1✔
864
        """Generates this user's Nostr private key if necessary."""
865
        if not self.nostr_key_bytes:
1✔
866
            logger.info(f'generating Nostr keypair for {self.key}')
1✔
867
            self.nostr_key_bytes = secp256k1.PrivateKey().private_key
1✔
868
            self.put()
1✔
869

870
    def name(self):
1✔
871
        """Returns this user's human-readable name, eg ``Ryan Barrett``."""
872
        if self.obj and self.obj.as1:
1✔
873
            name = self.obj.as1.get('displayName')
1✔
874
            if name:
1✔
875
                return name
1✔
876

877
        return self.handle_or_id()
1✔
878

879
    def web_url(self):
1✔
880
        """Returns this user's user-facing profile page URL.
881

882
        ...eg ``https://bsky.app/profile/snarfed.org`` or ``https://foo.com/``.
883

884
        To be implemented by subclasses.
885

886
        Returns:
887
          str
888
        """
UNCOV
889
        raise NotImplementedError()
×
890

891
    def is_web_url(self, url, ignore_www=False):
1✔
892
        """Returns True if the given URL is this user's web URL (homepage).
893

894
        Args:
895
          url (str)
896
          ignore_www (bool): if True, ignores ``www.`` subdomains
897

898
        Returns:
899
          bool:
900
        """
901
        if not url:
1✔
902
            return False
1✔
903

904
        url = url.strip().rstrip('/')
1✔
905
        url = re.sub(r'^(https?://)www\.', r'\1', url)
1✔
906
        parsed_url = urlparse(url)
1✔
907
        if parsed_url.scheme not in ('http', 'https', ''):
1✔
908
            return False
1✔
909

910
        this = self.web_url().rstrip('/')
1✔
911
        this = re.sub(r'^(https?://)www\.', r'\1', this)
1✔
912
        parsed_this = urlparse(this)
1✔
913

914
        return (url == this or url == parsed_this.netloc or
1✔
915
                parsed_url[1:] == parsed_this[1:])  # ignore http vs https
916

917
    def id_uri(self):
1✔
918
        """Returns the user id as a URI.
919

920
        Sometimes this is the user id itself, eg ActivityPub actor ids.
921
        Sometimes it's a bit different, eg at://did:plc:... for ATProto user,
922
        https://site.com for Web users.
923

924
        Returns:
925
          str
926
        """
927
        return self.key.id()
1✔
928

929
    def profile_id(self):
1✔
930
        """Returns the id of this user's profile object in its native protocol.
931

932
        Examples:
933

934
        * Web: home page URL, eg ``https://me.com/``
935
        * ActivityPub: actor URL, eg ``https://instance.com/users/me``
936
        * ATProto: profile AT URI, eg ``at://did:plc:123/app.bsky.actor.profile/self``
937

938
        Defaults to this user's key id.
939

940
        Returns:
941
          str or None:
942
        """
943
        return ids.profile_id(id=self.key.id(), proto=self)
1✔
944

945
    def is_profile(self, obj):
1✔
946
        """Returns True if ``obj`` is this user's profile/actor, False otherwise.
947

948
        Args:
949
          obj (Object)
950

951
        Returns:
952
          bool:
953
        """
954
        if obj.key.id() in (self.key.id(), self.profile_id()):
1✔
955
            return True
1✔
956

957
        if self.obj_key and obj.key.id() == self.obj_key.id():
1✔
958
            return True
1✔
959

960
    def reload_profile(self, **kwargs):
1✔
961
        """Reloads this user's identity and profile from their native protocol.
962

963
        Populates the reloaded profile :class:`Object` in ``self.obj``.
964

965
        Args:
966
          kwargs: passed through to :meth:`Protocol.load`
967
        """
968
        obj = self.load(self.profile_id(), remote=True, **kwargs)
1✔
969
        if obj:
1✔
970
            if obj.type:
1✔
971
                assert obj.type in as1.ACTOR_TYPES, obj.type
1✔
972
            self.obj = obj
1✔
973

974
        # write the user so that we re-populate any computed properties
975
        self.put()
1✔
976

977
    def user_page_path(self, rest=None, prefer_id=False):
1✔
978
        """Returns the user's Bridgy Fed user page path.
979

980
        Args:
981
          rest (str): additional path and/or query to add to the end
982
          prefer_id (bool): whether to prefer to use the account's id in the path
983
            instead of handle. Defaults to ``False``.
984
        """
985
        path = f'/{self.ABBREV}/{self.key.id() if prefer_id else self.handle_or_id()}'
1✔
986

987
        if rest:
1✔
988
            if not (rest.startswith('?') or rest.startswith('/')):
1✔
989
                path += '/'
1✔
990
            path += rest
1✔
991

992
        return path
1✔
993

994
    def get_copy(self, proto):
1✔
995
        """Returns the id for the copy of this user in a given protocol.
996

997
        ...or None if no such copy exists. If ``proto`` is this user, returns
998
        this user's key id.
999

1000
        Args:
1001
          proto: :class:`Protocol` subclass
1002

1003
        Returns:
1004
          str:
1005
        """
1006
        # don't use isinstance because the testutil Fake protocol has subclasses
1007
        if self.LABEL == proto.LABEL:
1✔
1008
            return self.key.id()
1✔
1009

1010
        for copy in self.copies:
1✔
1011
            if copy.protocol in (proto.LABEL, proto.ABBREV):
1✔
1012
                return copy.uri
1✔
1013

1014
    def user_link(self, name=True, handle=True, pictures=False, logo=None,
1✔
1015
                  proto=None, proto_fallback=False):
1016
        """Returns a pretty HTML link to the user's profile.
1017

1018
        Can optionally include display name, handle, profile
1019
        picture, and/or link to a different protocol that they've enabled.
1020

1021
        TODO: unify with :meth:`Object.actor_link`?
1022

1023
        Args:
1024
          name (bool): include display name
1025
          handle (bool): True to include handle, False to exclude it, ``'short'``
1026
            to include a shortened version, if available
1027
          pictures (bool): include profile picture and protocol logo
1028
          logo (str): optional path to platform logo to show instead of the
1029
            protocol's default
1030
          proto (protocol.Protocol): link to this protocol instead of the user's
1031
            native protocol
1032
          proto_fallback (bool): if True, and ``proto`` is provided and has no
1033
            no canonical profile URL for bridged users, uses the user's profile
1034
            URL in their native protocol
1035
        """
1036
        img = name_str = full_handle = handle_str = dot = logo_html = a_open = a_close = ''
1✔
1037

1038
        if proto:
1✔
1039
            assert self.is_enabled(proto), f"{proto.LABEL} isn't enabled"
1✔
1040
            url = proto.bridged_web_url_for(self, fallback=proto_fallback)
1✔
1041
        else:
1042
            proto = self.__class__
1✔
1043
            url = self.web_url()
1✔
1044

1045
        if pictures:
1✔
1046
            if logo:
1✔
1047
                logo_html = f'<img class="logo" src="{logo}" /> '
1✔
1048
            else:
1049
                logo_html = f'<span class="logo" title="{proto.__name__}">{proto.LOGO_HTML or proto.LOGO_EMOJI}</span> '
1✔
1050
            if pic := self.profile_picture():
1✔
1051
                img = f'<img src="{pic}" class="profile"> '
1✔
1052

1053
        if handle:
1✔
1054
            full_handle = self.handle_as(proto) or ''
1✔
1055
            handle_str = self.handle_as(proto, short=(handle == 'short')) or ''
1✔
1056

1057
        if name and self.name() != full_handle:
1✔
1058
            name_str = self.name() or ''
1✔
1059

1060
        if handle_str and name_str:
1✔
1061
            dot = ' &middot; '
1✔
1062

1063
        if url:
1✔
1064
            a_open = f'<a class="h-card u-author mention" rel="me" href="{url}" title="{name_str}{dot}{full_handle}">'
1✔
1065
            a_close = '</a>'
1✔
1066

1067
        name_html = f'<span style="unicode-bidi: isolate">{ellipsize(name_str, chars=40)}</span>' if name_str else ''
1✔
1068
        return f'{logo_html}{a_open}{img}{name_html}{dot}{ellipsize(handle_str, chars=40)}{a_close}'
1✔
1069

1070
    def profile_picture(self):
1✔
1071
        """Returns the user's profile picture image URL, if available, or None."""
1072
        if self.obj and self.obj.as1:
1✔
1073
            return util.get_url(self.obj.as1, 'image')
1✔
1074

1075
    # can't use functools.lru_cache here because we want the cache key to be
1076
    # just the user id, not the whole entity
1077
    @cachetools.cached(
1✔
1078
        cachetools.TTLCache(50000, FOLLOWERS_CACHE_EXPIRATION.total_seconds()),
1079
        key=lambda user: user.key.id(), lock=Lock())
1080
    @memcache.memoize(key=lambda self: self.key.id(),
1✔
1081
                      expire=FOLLOWERS_CACHE_EXPIRATION)
1082
    def count_followers(self):
1✔
1083
        """Counts this user's followers and followings.
1084

1085
        Returns:
1086
          (int, int) tuple: (number of followers, number following)
1087
        """
1088
        if self.key.id() in PROTOCOL_DOMAINS:
1✔
1089
            # we don't store Followers for protocol bot users any more, so
1090
            # follower counts are inaccurate, so don't return them
1091
            return (0, 0)
1✔
1092

1093
        num_followers = Follower.query(Follower.to == self.key,
1✔
1094
                                       Follower.status == 'active')\
1095
                                .count_async()
1096
        num_following = Follower.query(Follower.from_ == self.key,
1✔
1097
                                       Follower.status == 'active')\
1098
                                .count_async()
1099
        return num_followers.get_result(), num_following.get_result()
1✔
1100

1101
    def add_domain_blocklist(self, url):
1✔
1102
        """Adds a domain blocklist to this user.
1103

1104
        Loads the CSV at the given URL adds it to :attr:`blocks` if it's
1105
        not already there.
1106

1107
        Args:
1108
          url (str): URL of CSV blocklist to add
1109

1110
        Returns:
1111
          bool: True if added, False if the URL couldn't be loaded, None if
1112
            it was already present
1113
        """
1114
        from web import Web
1✔
1115

1116
        if Object(id=maybe_truncate_key_id(url)).key in self.blocks:
1✔
1117
            return
1✔
1118

1119
        obj = Web.load(url, csv=True)
1✔
1120
        if not obj:
1✔
1121
            return False
1✔
1122

1123
        self.blocks.append(obj.key)
1✔
1124
        return True
1✔
1125

1126

1127
# WARNING: AddRemoveMixin *must* be before StringIdModel here so that its __init__
1128
# gets called! Due to an (arguable) ndb.Model bug:
1129
# https://github.com/googleapis/python-ndb/issues/1025
1130
class Object(AddRemoveMixin, StringIdModel):
1✔
1131
    """An activity or other object, eg actor.
1132

1133
    Key name is the id, generally a URI. We synthesize ids if necessary.
1134
    """
1135
    GET_ORIGINAL_FN = get_original_object_key
1✔
1136
    'used by AddRemoveMixin'
1✔
1137

1138
    users = ndb.KeyProperty(repeated=True)
1✔
1139
    'User(s) who created or otherwise own this object.'
1✔
1140

1141
    notify = ndb.KeyProperty(repeated=True)
1✔
1142
    """User who should see this in their user page, eg in reply to, reaction to,
1✔
1143
    share of, etc.
1144
    """
1145
    feed = ndb.KeyProperty(repeated=True)
1✔
1146
    'User who should see this in their feeds, eg followers of its creator'
1✔
1147

1148
    source_protocol = ndb.StringProperty(choices=list(PROTOCOLS.keys()))
1✔
1149
    """The protocol this object originally came from.
1✔
1150

1151
    TODO: nail down whether this is :attr:`ABBREV`` or :attr:`LABEL`
1152
    """
1153

1154
    # TODO: switch back to ndb.JsonProperty if/when they fix it for the web console
1155
    # https://github.com/googleapis/python-ndb/issues/874
1156
    as2 = JsonProperty()
1✔
1157
    'ActivityStreams 2, for ActivityPub'
1✔
1158
    bsky = JsonProperty()
1✔
1159
    'AT Protocol lexicon, for Bluesky'
1✔
1160
    csv = ndb.TextProperty()
1✔
1161
    'Other standalone CSV data, eg domain blocklist.'
1✔
1162
    mf2 = JsonProperty()
1✔
1163
    'HTML microformats2 item (*not* top level parse object with ``items`` field)'
1✔
1164
    nostr = JsonProperty()
1✔
1165
    'Nostr event'
1✔
1166
    our_as1 = JsonProperty()
1✔
1167
    'ActivityStreams 1, for activities that we generate or modify ourselves'
1✔
1168
    raw = JsonProperty()
1✔
1169
    'Other standalone data format, eg DID document'
1✔
1170

1171
    extra_as1 = JsonProperty()
1✔
1172
    "Additional individual fields to merge into this object's AS1 representation"
1✔
1173

1174
    # TODO: remove and actually delete Objects instead!
1175
    deleted = ndb.BooleanProperty()
1✔
1176
    ''
1✔
1177

1178
    copies = ndb.StructuredProperty(Target, repeated=True)
1✔
1179
    """Copies of this object elsewhere, eg at:// URIs for ATProto records and
1✔
1180
    nevent etc bech32-encoded Nostr ids, where this object is the original.
1181
    Similar to u-syndication links in microformats2 and
1182
    upstream/downstreamDuplicates in AS1.
1183
    """
1184

1185
    created = ndb.DateTimeProperty(auto_now_add=True)
1✔
1186
    ''
1✔
1187
    updated = ndb.DateTimeProperty(auto_now=True)
1✔
1188
    ''
1✔
1189

1190
    new = None
1✔
1191
    """True if this object is new, ie this is the first time we've seen it,
1✔
1192
    False otherwise, None if we don't know.
1193
    """
1194
    changed = None
1✔
1195
    """True if this object's contents have changed from our existing copy in the
1✔
1196
    datastore, False otherwise, None if we don't know. :class:`Object` is
1197
    new/changed. See :meth:`activity_changed()` for more details.
1198
    """
1199

1200
    # DEPRECATED
1201
    # These were for full feeds with multiple items, not just this one, so they were
1202
    # stored as audit records only, not used in to_as1. for Atom/RSS
1203
    # based Objects, our_as1 was populated with an feed_index top-level
1204
    # integer field that indexed into one of these.
1205
    #
1206
    # atom = ndb.TextProperty() # Atom XML
1207
    # rss = ndb.TextProperty()  # RSS XML
1208

1209
    # DEPRECATED; these were for delivery tracking, but they were too expensive,
1210
    # so we stopped: https://github.com/snarfed/bridgy-fed/issues/1501
1211
    #
1212
    # STATUSES = ('new', 'in progress', 'complete', 'failed', 'ignored')
1213
    # status = ndb.StringProperty(choices=STATUSES)
1214
    # delivered = ndb.StructuredProperty(Target, repeated=True)
1215
    # undelivered = ndb.StructuredProperty(Target, repeated=True)
1216
    # failed = ndb.StructuredProperty(Target, repeated=True)
1217

1218
    # DEPRECATED but still used read only to maintain backward compatibility
1219
    # with old Objects in the datastore that we haven't bothered migrating.
1220
    #
1221
    # domains = ndb.StringProperty(repeated=True)
1222

1223
    # DEPRECATED; replaced by :attr:`users`, :attr:`notify`, :attr:`feed`
1224
    #
1225
    # labels = ndb.StringProperty(repeated=True,
1226
    #                             choices=('activity', 'feed', 'notification', 'user'))
1227

1228
    @property
1✔
1229
    def as1(self):
1✔
1230
        def use_urls_as_ids(obj):
1✔
1231
            """If id field is missing or not a URL, use the url field."""
1232
            id = obj.get('id')
1✔
1233
            if not id or not (util.is_web(id) or re.match(DOMAIN_RE, id)):
1✔
1234
                if url := util.get_url(obj):
1✔
1235
                    obj['id'] = url
1✔
1236

1237
            for field in 'author', 'actor', 'object':
1✔
1238
                if inner := as1.get_object(obj, field):
1✔
1239
                    use_urls_as_ids(inner)
1✔
1240

1241
        if self.our_as1:
1✔
1242
            obj = self.our_as1
1✔
1243
            if self.source_protocol == 'web':
1✔
1244
                use_urls_as_ids(obj)
1✔
1245

1246
        elif self.as2:
1✔
1247
            obj = as2.to_as1(unwrap(self.as2))
1✔
1248

1249
        elif self.bsky:
1✔
1250
            owner, _, _ = parse_at_uri(self.key.id())
1✔
1251
            ATProto = PROTOCOLS['atproto']
1✔
1252
            handle = ATProto(id=owner).handle
1✔
1253
            try:
1✔
1254
                obj = bluesky.to_as1(self.bsky, repo_did=owner, repo_handle=handle,
1✔
1255
                                     uri=self.key.id(), pds=ATProto.pds_for(self))
1256
            except (ValueError, RequestException):
1✔
1257
                logger.info(f"Couldn't convert to ATProto", exc_info=True)
1✔
1258
                return None
1✔
1259

1260
        elif self.mf2:
1✔
1261
            obj = microformats2.json_to_object(self.mf2,
1✔
1262
                                               rel_urls=self.mf2.get('rel-urls'))
1263
            use_urls_as_ids(obj)
1✔
1264

1265
            # use fetched final URL as id, not u-url
1266
            # https://github.com/snarfed/bridgy-fed/issues/829
1267
            if url := self.mf2.get('url'):
1✔
1268
                obj['id'] = (self.key.id() if self.key and '#' in self.key.id()
1✔
1269
                             else url)
1270

1271
        elif self.nostr:
1✔
1272
            obj = granary.nostr.to_as1(self.nostr)
1✔
1273

1274
        else:
1275
            return None
1✔
1276

1277
        # populate id if necessary
1278
        if self.key:
1✔
1279
            obj.setdefault('id', self.key.id())
1✔
1280

1281
        if util.domain_or_parent_in(obj.get('id'), IMAGE_PROXY_DOMAINS):
1✔
1282
           as1.prefix_urls(obj, 'image', IMAGE_PROXY_URL_BASE)
1✔
1283

1284
        if self.extra_as1:
1✔
1285
            obj.update(self.extra_as1)
1✔
1286

1287
        return obj
1✔
1288

1289
    @ndb.ComputedProperty
1✔
1290
    def type(self):  # AS1 objectType, or verb if it's an activity
1✔
1291
        if self.as1:
1✔
1292
            return as1.object_type(self.as1)
1✔
1293

1294
    def _expire(self):
1✔
1295
        """Automatically delete most Objects after a while using a TTL policy.
1296

1297
        https://cloud.google.com/datastore/docs/ttl
1298

1299
        They recommend not indexing TTL properties:
1300
        https://cloud.google.com/datastore/docs/ttl#ttl_properties_and_indexes
1301
        """
1302
        now = self.updated or util.now()
1✔
1303
        if self.deleted:
1✔
1304
            return now + timedelta(days=1)
1✔
1305
        elif self.type not in DONT_EXPIRE_OBJECT_TYPES:
1✔
1306
            return now + OBJECT_EXPIRE_AGE
1✔
1307

1308
    expire = ndb.ComputedProperty(_expire, indexed=False)
1✔
1309

1310
    def _pre_put_hook(self):
1✔
1311
        """
1312
        * Validate that at:// URIs have DIDs
1313
        * Validate that Nostr ids are nostr:[hex] ids
1314
        * Set/remove the activity label
1315
        * Strip @context from as2 (we don't do LD) to save disk space
1316
        """
1317
        if self.as2:
1✔
1318
           self.as2.pop('@context', None)
1✔
1319
           for field in 'actor', 'attributedTo', 'author', 'object':
1✔
1320
               for val in util.get_list(self.as2, field):
1✔
1321
                   if isinstance(val, dict):
1✔
1322
                       val.pop('@context', None)
1✔
1323

1324
        def check_id(id, proto):
1✔
1325
            if proto in (None, 'ui'):
1✔
1326
                return
1✔
1327

1328
            assert PROTOCOLS[proto].owns_id(id) is not False, \
1✔
1329
                f'Protocol {PROTOCOLS[proto].LABEL} does not own id {id}'
1330

1331
            if proto == 'nostr':
1✔
1332
                assert id.startswith('nostr:'), id
1✔
1333
                assert granary.nostr.ID_RE.match(id.removeprefix('nostr:')), id
1✔
1334

1335
            elif proto == 'atproto':
1✔
1336
                assert id.startswith('at://') or id.startswith('did:'), id
1✔
1337
                if id.startswith('at://'):
1✔
1338
                    repo, _, _ = parse_at_uri(id)
1✔
1339
                    if not repo.startswith('did:'):
1✔
1340
                        # TODO: if we hit this, that means the AppView gave us an AT
1341
                        # URI with a handle repo/authority instead of DID. that's
1342
                        # surprising! ...if so, and if we need to handle it, add a
1343
                        # new arroba.did.canonicalize_at_uri() function, then use it
1344
                        # here, or before.
1345
                        raise ValueError(f'at:// URI ids must have DID repos; got {id}')
1✔
1346

1347
        check_id(self.key.id(), self.source_protocol)
1✔
1348
        for target in self.copies:
1✔
1349
            check_id(target.uri, target.protocol)
1✔
1350

1351
    def _post_put_hook(self, future):
1✔
1352
        # TODO: assert that as1 id is same as key id? in pre put hook?
1353
        logger.debug(f'Wrote {self.key}')
1✔
1354

1355
    @classmethod
1✔
1356
    def get_by_id(cls, id, authed_as=None, **kwargs):
1✔
1357
        """Fetches the :class:`Object` with the given id, if it exists.
1358

1359
        Args:
1360
          id (str)
1361
          authed_as (str): optional; if provided, and a matching :class:`Object`
1362
            already exists, its ``author`` or ``actor`` must contain this actor
1363
            id. Implements basic authorization for updates and deletes.
1364

1365
        Returns:
1366
          Object:
1367

1368
        Raises:
1369
          :class:`werkzeug.exceptions.Forbidden` if ``authed_as`` doesn't match
1370
            the existing object
1371
        """
1372
        obj = super().get_by_id(maybe_truncate_key_id(id), **kwargs)
1✔
1373

1374
        if obj and obj.as1 and authed_as:
1✔
1375
            # authorization: check that the authed user is allowed to modify
1376
            # this object
1377
            # https://www.w3.org/wiki/ActivityPub/Primer/Authentication_Authorization
1378
            proto = PROTOCOLS.get(obj.source_protocol)
1✔
1379
            assert proto, obj.source_protocol
1✔
1380
            owners = [ids.normalize_user_id(id=owner, proto=proto)
1✔
1381
                      for owner in (as1.get_ids(obj.as1, 'author')
1382
                                    + as1.get_ids(obj.as1, 'actor'))
1383
                                    + [id]]
1384
            if (ids.normalize_user_id(id=authed_as, proto=proto) not in owners
1✔
1385
                    and ids.profile_id(id=authed_as, proto=proto) not in owners):
1386
                report_error("Auth: Object: authed_as doesn't match owner",
1✔
1387
                             user=f'{id} authed_as {authed_as} owners {owners}')
1388
                error(f"authed user {authed_as} isn't object owner {owners}",
1✔
1389
                      status=403)
1390

1391
        return obj
1✔
1392

1393
    @classmethod
1✔
1394
    def get_or_create(cls, id, authed_as=None, **props):
1✔
1395
        """Returns an :class:`Object` with the given property values.
1396

1397
        If a matching :class:`Object` doesn't exist in the datastore, creates it
1398
        first. Only populates non-False/empty property values in props into the
1399
        object. Also populates the :attr:`new` and :attr:`changed` properties.
1400

1401
        Not transactional because transactions don't read or write memcache. :/
1402
        Fortunately we don't really depend on atomicity for much, last writer wins
1403
        is usually fine.
1404

1405
        Args:
1406
          authed_as (str): optional; if provided, and a matching :class:`Object`
1407
            already exists, its ``author`` or ``actor`` must contain this actor
1408
            id. Implements basic authorization for updates and deletes.
1409

1410
        Returns:
1411
          Object:
1412

1413
        Raises:
1414
          :class:`werkzeug.exceptions.Forbidden` if ``authed_as`` doesn't match
1415
            the existing object
1416
        """
1417
        key_id = maybe_truncate_key_id(id)
1✔
1418
        obj = cls.get_by_id(key_id, authed_as=authed_as)
1✔
1419

1420
        if not obj:
1✔
1421
            obj = Object(id=key_id, **props)
1✔
1422
            obj.new = True
1✔
1423
            obj.changed = False
1✔
1424
            obj.put()
1✔
1425
            return obj
1✔
1426

1427
        if orig_as1 := obj.as1:
1✔
1428
            # get_by_id() checks authorization if authed_as is set. make sure
1429
            # it's always set for existing objects.
1430
            assert authed_as
1✔
1431

1432
        dirty = False
1✔
1433
        for prop, val in props.items():
1✔
1434
            assert not isinstance(getattr(Object, prop), ndb.ComputedProperty)
1✔
1435
            if prop in ('copies', 'feed', 'notify', 'users'):
1✔
1436
                # merge repeated fields
1437
                for elem in val:
1✔
1438
                    if obj.add(prop, elem):
1✔
1439
                        dirty = True
1✔
1440
            elif val is not None and val != getattr(obj, prop):
1✔
1441
                setattr(obj, prop, val)
1✔
1442
                if (prop in ('as2', 'bsky', 'csv', 'mf2', 'nostr', 'raw')
1✔
1443
                        and not props.get('our_as1')):
1444
                    obj.our_as1 = None
1✔
1445
                dirty = True
1✔
1446

1447
        obj.new = False
1✔
1448
        obj.changed = obj.activity_changed(orig_as1)
1✔
1449
        if dirty:
1✔
1450
            obj.put()
1✔
1451
        return obj
1✔
1452

1453
    @staticmethod
1✔
1454
    def from_request():
1✔
1455
        """Creates and returns an :class:`Object` from form-encoded JSON parameters.
1456

1457
        Parameters:
1458
          obj_id (str): id of :class:`models.Object` to handle
1459
          *: If ``obj_id`` is unset, all other parameters are properties for a
1460
            new :class:`models.Object` to handle
1461
        """
1462
        if obj_id := request.form.get('obj_id'):
1✔
1463
            return Object.get_by_id(obj_id)
1✔
1464

1465
        props = {field: request.form.get(field)
1✔
1466
                 for field in ('id', 'source_protocol')}
1467

1468
        for json_prop in 'as2', 'bsky', 'mf2', 'our_as1', 'nostr', 'raw':
1✔
1469
            if val := request.form.get(json_prop):
1✔
1470
                props[json_prop] = json_loads(val)
1✔
1471

1472
        obj = Object(**props)
1✔
1473
        if not obj.key and obj.as1:
1✔
1474
            if id := obj.as1.get('id'):
1✔
1475
                obj.key = ndb.Key(Object, id)
1✔
1476

1477
        return obj
1✔
1478

1479
    def to_request(self):
1✔
1480
        """Returns a query parameter dict representing this :class:`Object`."""
1481
        form = {}
1✔
1482

1483
        for json_prop in 'as2', 'bsky', 'mf2', 'nostr', 'our_as1', 'raw':
1✔
1484
            if val := getattr(self, json_prop, None):
1✔
1485
                form[json_prop] = json_dumps(val, sort_keys=True)
1✔
1486

1487
        for prop in ['source_protocol']:
1✔
1488
            if val := getattr(self, prop):
1✔
1489
                form[prop] = val
1✔
1490

1491
        if self.key:
1✔
1492
            form['id'] = self.key.id()
1✔
1493

1494
        return form
1✔
1495

1496
    def activity_changed(self, other_as1):
1✔
1497
        """Returns True if this activity is meaningfully changed from ``other_as1``.
1498

1499
        ...otherwise False.
1500

1501
        Used to populate :attr:`changed`.
1502

1503
        Args:
1504
          other_as1 (dict): AS1 object, or none
1505
        """
1506
        # ignore inReplyTo since we translate it between protocols
1507
        return (as1.activity_changed(self.as1, other_as1, inReplyTo=False)
1✔
1508
                if self.as1 and other_as1
1509
                else bool(self.as1) != bool(other_as1))
1510

1511
    def actor_link(self, image=True, sized=False, user=None):
1✔
1512
        """Returns a pretty HTML link with the actor's name and picture.
1513

1514
        TODO: unify with :meth:`User.user_link`?
1515

1516
        Args:
1517
          image (bool): whether to include an ``img`` tag with the actor's picture
1518
          sized (bool): whether to set an explicit (``width=32``) size on the
1519
            profile picture ``img`` tag
1520
          user (User): current user
1521

1522
        Returns:
1523
          str:
1524
        """
1525
        attrs = {'class': 'h-card u-author'}
1✔
1526

1527
        if user and user.key in self.users:
1✔
1528
            # outbound; show a nice link to the user
1529
            return user.user_link(handle=False, pictures=True)
1✔
1530

1531
        proto = PROTOCOLS.get(self.source_protocol)
1✔
1532

1533
        actor = None
1✔
1534
        if self.as1:
1✔
1535
            actor = (as1.get_object(self.as1, 'actor')
1✔
1536
                     or as1.get_object(self.as1, 'author'))
1537
            # hydrate from datastore if available
1538
            # TODO: optimize! this is called serially in loops, eg in home.html
1539
            if set(actor.keys()) == {'id'} and self.source_protocol:
1✔
1540
                actor_obj = proto.load(actor['id'], remote=False)
1✔
1541
                if actor_obj and actor_obj.as1:
1✔
1542
                    actor = actor_obj.as1
1✔
1543

1544
        if not actor:
1✔
1545
            return ''
1✔
1546
        elif set(actor.keys()) == {'id'}:
1✔
1547
            return common.pretty_link(actor['id'], attrs=attrs, user=user)
1✔
1548

1549
        url = as1.get_url(actor)
1✔
1550
        name = actor.get('displayName') or actor.get('username') or ''
1✔
1551
        img_url = util.get_url(actor, 'image')
1✔
1552
        if not image or not img_url:
1✔
1553
            return common.pretty_link(url, text=name, attrs=attrs, user=user)
1✔
1554

1555
        logo = ''
1✔
1556
        if proto:
1✔
UNCOV
1557
            logo = f'<span class="logo" title="{self.__class__.__name__}">{proto.LOGO_HTML or proto.LOGO_EMOJI}</span>'
×
1558

1559
        return f"""\
1✔
1560
        {logo}
1561
        <a class="h-card u-author" href="{url}" title="{name}">
1562
          <img class="profile" src="{img_url}" {'width="32"' if sized else ''}/>
1563
          <span style="unicode-bidi: isolate">{util.ellipsize(name, chars=40)}</span>
1564
        </a>"""
1565

1566
    def get_copy(self, proto):
1✔
1567
        """Returns the id for the copy of this object in a given protocol.
1568

1569
        ...or None if no such copy exists. If ``proto`` is ``source_protocol``,
1570
        returns this object's key id.
1571

1572
        TODO: for some protocols, we should try harder to find the *right* copy id.
1573
        Eg if if copies has some old garbage entries for this protocol, and we can
1574
        tell that they don't belong to the user's copy account in this protocol, eg
1575
        if the DID in the at:// URI doesn't match, we should skip those and look for
1576
        the matching copy. We'd need the user here though.
1577
        This would help with or fix:
1578
        https://console.cloud.google.com/errors/detail/COK22a6w4O2JVg;locations=global;time=P30D?project=bridgy-federated
1579

1580
        Args:
1581
          proto: :class:`Protocol` subclass
1582

1583
        Returns:
1584
          str:
1585
        """
1586
        if self.source_protocol in (proto.LABEL, proto.ABBREV):
1✔
1587
            return self.key.id()
1✔
1588

1589
        for copy in self.copies:
1✔
1590
            if copy.protocol in (proto.LABEL, proto.ABBREV):
1✔
1591
                return copy.uri
1✔
1592

1593
    def resolve_ids(self):
1✔
1594
        """Replaces "copy" ids, subdomain ids, etc with their originals.
1595

1596
        The end result is that all ids are original "source" ids, ie in the
1597
        protocol that they first came from.
1598

1599
        Specifically, resolves:
1600

1601
        * ids in :class:`User.copies` and :class:`Object.copies`, eg ATProto
1602
          records and Nostr events that we bridged, to the ids of their
1603
          original objects in their source protocol, eg
1604
          ``at://did:plc:abc/app.bsky.feed.post/123`` => ``https://mas.to/@user/456``.
1605
        * Bridgy Fed subdomain URLs to the ids embedded inside them, eg
1606
          ``https://bsky.brid.gy/ap/did:plc:xyz`` => ``did:plc:xyz``
1607
        * ATProto bsky.app URLs to their DIDs or `at://` URIs, eg
1608
          ``https://bsky.app/profile/a.com`` => ``did:plc:123``
1609

1610
        ...in these AS1 fields, in place:
1611

1612
        * ``id``
1613
        * ``actor``
1614
        * ``author``
1615
        * ``object``
1616
        * ``object.actor``
1617
        * ``object.author``
1618
        * ``object.id``
1619
        * ``object.inReplyTo``
1620
        * ``attachments.[objectType=note].id``
1621
        * ``tags.[objectType=mention].url``
1622

1623
        :meth:`protocol.Protocol.translate_ids` is partly the inverse of this.
1624
        Much of the same logic is duplicated there!
1625

1626
        TODO: unify with :meth:`normalize_ids`, :meth:`Object.normalize_ids`.
1627
        """
1628
        if not self.as1:
1✔
1629
            return
1✔
1630

1631
        # extract ids, strip Bridgy Fed subdomain URLs
1632
        outer_obj = unwrap(self.as1)
1✔
1633
        if outer_obj != self.as1:
1✔
1634
            self.our_as1 = util.trim_nulls(outer_obj)
1✔
1635

1636
        self_proto = PROTOCOLS.get(self.source_protocol)
1✔
1637
        if not self_proto:
1✔
1638
            return
1✔
1639

1640
        logger.debug(f'Resolving ids for {self.key.id()}')
1✔
1641
        inner_obj = outer_obj['object'] = as1.get_object(outer_obj)
1✔
1642
        replaced = False
1✔
1643

1644
        def replace(val, orig_fn):
1✔
1645
            id = val.get('id') if isinstance(val, dict) else val
1✔
1646
            if not id or not self_proto.HAS_COPIES:
1✔
1647
                return id
1✔
1648

1649
            orig = orig_fn(id)
1✔
1650
            if not orig:
1✔
1651
                return val
1✔
1652

1653
            nonlocal replaced
1654
            replaced = True
1✔
1655
            logger.debug(f'Resolved copy id {val} to original {orig.id()}')
1✔
1656

1657
            if isinstance(val, dict) and util.trim_nulls(val).keys() > {'id'}:
1✔
1658
                val['id'] = orig.id()
1✔
1659
                return val
1✔
1660
            else:
1661
                return orig.id()
1✔
1662

1663
        # actually replace ids
1664
        #
1665
        # object field could be either object (eg repost) or actor (eg follow)
1666
        outer_obj['object'] = replace(inner_obj, get_original_object_key)
1✔
1667
        if not replaced:
1✔
1668
            outer_obj['object'] = replace(inner_obj, get_original_user_key)
1✔
1669

1670
        for obj in outer_obj, inner_obj:
1✔
1671
            for tag in as1.get_objects(obj, 'tags'):
1✔
1672
                if tag.get('objectType') == 'mention':
1✔
1673
                    tag['url'] = replace(tag.get('url'), get_original_user_key)
1✔
1674
            for att in as1.get_objects(obj, 'attachments'):
1✔
1675
                if att.get('objectType') == 'note':
1✔
1676
                    att['id'] = replace(att.get('id'), get_original_object_key)
1✔
1677
            for field, fn in (
1✔
1678
                    ('actor', get_original_user_key),
1679
                    ('author', get_original_user_key),
1680
                    ('inReplyTo', get_original_object_key),
1681
                ):
1682
                obj[field] = [replace(val, fn) for val in util.get_list(obj, field)]
1✔
1683
                if len(obj[field]) == 1:
1✔
1684
                    obj[field] = obj[field][0]
1✔
1685

1686
        if replaced:
1✔
1687
            self.our_as1 = util.trim_nulls(outer_obj)
1✔
1688

1689
    def normalize_ids(self):
1✔
1690
        """Normalizes ids to their protocol's canonical representation, if any.
1691

1692
        For example, normalizes ATProto ``https://bsky.app/...`` URLs to DIDs
1693
        for profiles, ``at://`` URIs for posts.
1694

1695
        Modifies this object in place.
1696

1697
        TODO: unify with :meth:`resolve_ids`, :meth:`Protocol.translate_ids`.
1698
        """
1699
        from protocol import Protocol
1✔
1700

1701
        if not self.as1:
1✔
1702
            return
1✔
1703

1704
        logger.debug(f'Normalizing ids for {self.key.id()}')
1✔
1705
        outer_obj = copy.deepcopy(self.as1)
1✔
1706
        inner_objs = as1.get_objects(outer_obj)
1✔
1707
        replaced = False
1✔
1708

1709
        def replace(val, translate_fn):
1✔
1710
            nonlocal replaced
1711

1712
            orig = val.get('id') if isinstance(val, dict) else val
1✔
1713
            if not orig:
1✔
1714
                return val
1✔
1715

1716
            proto = Protocol.for_id(orig, remote=False)
1✔
1717
            if not proto:
1✔
1718
                return val
1✔
1719

1720
            translated = translate_fn(id=orig, from_=proto, to=proto)
1✔
1721
            if translated and translated != orig:
1✔
1722
                # logger.debug(f'Normalized {proto.LABEL} id {orig} to {translated}')
1723
                replaced = True
1✔
1724
                if isinstance(val, dict):
1✔
1725
                    val['id'] = translated
1✔
1726
                    return val
1✔
1727
                else:
1728
                    return translated
1✔
1729

1730
            return val
1✔
1731

1732
        # actually replace ids
1733
        for obj in [outer_obj] + inner_objs:
1✔
1734
            for tag in as1.get_objects(obj, 'tags'):
1✔
1735
                if tag.get('objectType') == 'mention':
1✔
1736
                    tag['url'] = replace(tag.get('url'), ids.translate_user_id)
1✔
1737
            for field in ['actor', 'author', 'inReplyTo']:
1✔
1738
                fn = (ids.translate_object_id if field == 'inReplyTo'
1✔
1739
                      else ids.translate_user_id)
1740
                obj[field] = [replace(val, fn) for val in util.get_list(obj, field)]
1✔
1741
                if len(obj[field]) == 1:
1✔
1742
                    obj[field] = obj[field][0]
1✔
1743

1744
        outer_obj['object'] = []
1✔
1745
        for inner_obj in inner_objs:
1✔
1746
            translate_fn = ids.translate_object_id
1✔
1747
            if (as1.object_type(inner_obj) in as1.ACTOR_TYPES
1✔
1748
                    or as1.object_type(outer_obj) in as1.VERBS_WITH_ACTOR_OBJECT):
1749
                translate_fn = ids.translate_user_id
1✔
1750

1751
            got = replace(inner_obj, translate_fn)
1✔
1752
            if isinstance(got, dict) and util.trim_nulls(got).keys() == {'id'}:
1✔
1753
                got = got['id']
1✔
1754

1755
            outer_obj['object'].append(got)
1✔
1756

1757
        if len(outer_obj['object']) == 1:
1✔
1758
            outer_obj['object'] = outer_obj['object'][0]
1✔
1759

1760
        if replaced:
1✔
1761
            self.our_as1 = util.trim_nulls(outer_obj)
1✔
1762

1763
    @cached_property
1✔
1764
    def domain_blocklist(self):
1✔
1765
        """Returns the domains in the domain blocklist in :attr:`csv`.
1766

1767
        Extracts the 'domain' or '#domain' column and returns its values as a
1768
        list.
1769

1770
        Returns:
1771
          list of str: domain names, or empty list if :attr:`csv` isn't
1772
            populated, or can't be parsed, or has neither of those columns.
1773
        """
1774
        if not self.csv:
1✔
1775
            return []
1✔
1776

1777
        try:
1✔
1778
            reader = csv.DictReader(io.StringIO(self.csv))
1✔
1779
        except csv.Error:
×
UNCOV
1780
            return []
×
1781

1782
        if 'domain' in reader.fieldnames:
1✔
1783
            col = 'domain'
1✔
1784
        elif '#domain' in reader.fieldnames:
1✔
1785
            col = '#domain'
1✔
1786
        else:
1787
            return []
1✔
1788

1789
        return [row[col] for row in reader
1✔
1790
                if row[col] and row[col] not in DOMAIN_BLOCKLIST_CANARIES]
1791

1792
class Follower(ndb.Model):
1✔
1793
    """A follower of a Bridgy Fed user."""
1794
    STATUSES = ('active', 'inactive')
1✔
1795

1796
    from_ = ndb.KeyProperty(name='from', required=True)
1✔
1797
    """The follower."""
1✔
1798
    to = ndb.KeyProperty(required=True)
1✔
1799
    """The followee, ie the user being followed."""
1✔
1800

1801
    follow = ndb.KeyProperty(Object)
1✔
1802
    """The last follow activity."""
1✔
1803
    status = ndb.StringProperty(choices=STATUSES, default='active')
1✔
1804
    """Whether this follow is active or not."""
1✔
1805

1806
    created = ndb.DateTimeProperty(auto_now_add=True)
1✔
1807
    updated = ndb.DateTimeProperty(auto_now=True)
1✔
1808

1809
    # OLD. some stored entities still have these; do not reuse.
1810
    # src = ndb.StringProperty()
1811
    # dest = ndb.StringProperty()
1812
    # last_follow = JsonProperty()
1813

1814
    def _pre_put_hook(self):
1✔
1815
        # we're a bridge! stick with bridging.
1816
        assert self.from_.kind() != self.to.kind(), f'from {self.from_} to {self.to}'
1✔
1817

1818
    def _post_put_hook(self, future):
1✔
1819
        logger.debug(f'Wrote {self.key}')
1✔
1820

1821
    @classmethod
1✔
1822
    def get_or_create(cls, *, from_, to, **kwargs):
1✔
1823
        """Returns a Follower with the given ``from_`` and ``to`` users.
1824

1825
        Not transactional because transactions don't read or write memcache. :/
1826
        Fortunately we don't really depend on atomicity for much, last writer wins
1827
        is usually fine.
1828

1829
        If a matching :class:`Follower` doesn't exist in the datastore, creates
1830
        it first.
1831

1832
        Args:
1833
          from_ (User)
1834
          to (User)
1835

1836
        Returns:
1837
          Follower:
1838
        """
1839
        assert from_
1✔
1840
        assert to
1✔
1841

1842
        follower = Follower.query(Follower.from_ == from_.key,
1✔
1843
                                  Follower.to == to.key,
1844
                                  ).get()
1845
        if not follower:
1✔
1846
            follower = Follower(from_=from_.key, to=to.key, **kwargs)
1✔
1847
            follower.put()
1✔
1848
        elif kwargs:
1✔
1849
            # update existing entity with new property values, eg to make an
1850
            # inactive Follower active again
1851
            for prop, val in kwargs.items():
1✔
1852
                setattr(follower, prop, val)
1✔
1853
            follower.put()
1✔
1854

1855
        return follower
1✔
1856

1857
    @staticmethod
1✔
1858
    def fetch_page(collection, user):
1✔
1859
        r"""Fetches a page of :class:`Follower`\s for a given user.
1860

1861
        Wraps :func:`fetch_page`. Paging uses the ``before`` and ``after`` query
1862
        parameters, if available in the request.
1863

1864
        Args:
1865
          collection (str): ``followers`` or ``following``
1866
          user (User)
1867

1868
        Returns:
1869
          (list of Follower, str, str) tuple: results, annotated with an extra
1870
          ``user`` attribute that holds the follower or following :class:`User`,
1871
          and new str query param values for ``before`` and ``after`` to fetch
1872
          the previous and next pages, respectively
1873
        """
1874
        assert collection in ('followers', 'following'), collection
1✔
1875

1876
        filter_prop = Follower.to if collection == 'followers' else Follower.from_
1✔
1877
        query = Follower.query(
1✔
1878
            Follower.status == 'active',
1879
            filter_prop == user.key,
1880
        )
1881

1882
        followers, before, after = fetch_page(query, Follower, by=Follower.updated)
1✔
1883
        users = ndb.get_multi(f.from_ if collection == 'followers' else f.to
1✔
1884
                              for f in followers)
1885
        User.load_multi(u for u in users if u)
1✔
1886

1887
        for f, u in zip(followers, users):
1✔
1888
            f.user = u
1✔
1889

1890
        followers = [f for f in followers if f.user]
1✔
1891

1892
        # only show followers in protocols that this user is bridged into
1893
        if collection == 'followers':
1✔
1894
            followers = [f for f in followers if user.is_enabled(f.user)]
1✔
1895

1896
        return followers, before, after
1✔
1897

1898

1899
def fetch_objects(query, by=None, user=None):
1✔
1900
    """Fetches a page of :class:`Object` entities from a datastore query.
1901

1902
    Wraps :func:`fetch_page` and adds attributes to the returned
1903
    :class:`Object` entities for rendering in ``objects.html``.
1904

1905
    Args:
1906
      query (ndb.Query)
1907
      by (ndb.model.Property): either :attr:`Object.updated` or
1908
        :attr:`Object.created`
1909
      user (User): current user
1910

1911
    Returns:
1912
      (list of Object, str, str) tuple:
1913
      (results, new ``before`` query param, new ``after`` query param)
1914
      to fetch the previous and next pages, respectively
1915
    """
1916
    assert by is Object.updated or by is Object.created
1✔
1917
    objects, new_before, new_after = fetch_page(query, Object, by=by)
1✔
1918
    objects = [o for o in objects if as1.is_public(o.as1) and not o.deleted]
1✔
1919

1920
    # synthesize human-friendly content for objects
1921
    for i, obj in enumerate(objects):
1✔
1922
        obj_as1 = obj.as1
1✔
1923
        type = as1.object_type(obj_as1)
1✔
1924

1925
        # AS1 verb => human-readable phrase
1926
        phrases = {
1✔
1927
            'accept': 'accepted',
1928
            'article': 'posted',
1929
            'comment': 'replied',
1930
            'delete': 'deleted',
1931
            'follow': 'followed',
1932
            'invite': 'is invited to',
1933
            'issue': 'filed issue',
1934
            'like': 'liked',
1935
            'note': 'posted',
1936
            'post': 'posted',
1937
            'repost': 'reposted',
1938
            'rsvp-interested': 'is interested in',
1939
            'rsvp-maybe': 'might attend',
1940
            'rsvp-no': 'is not attending',
1941
            'rsvp-yes': 'is attending',
1942
            'share': 'reposted',
1943
            'stop-following': 'unfollowed',
1944
            'undo': 'undid',
1945
            'update': 'updated',
1946
        }
1947
        phrases.update({type: 'profile refreshed:' for type in as1.ACTOR_TYPES})
1✔
1948

1949
        obj.phrase = phrases.get(type, '')
1✔
1950

1951
        content = (obj_as1.get('content')
1✔
1952
                   or obj_as1.get('displayName')
1953
                   or obj_as1.get('summary'))
1954
        if content:
1✔
1955
            content = util.parse_html(content).get_text()
1✔
1956

1957
        urls = as1.object_urls(obj_as1)
1✔
1958
        url = urls[0] if urls else None
1✔
1959
        if url and not content:
1✔
1960
            # heuristics for sniffing URLs and converting them to more friendly
1961
            # phrases and user handles.
1962
            # TODO: standardize this into granary.as2 somewhere?
1963
            from activitypub import FEDI_URL_RE
×
UNCOV
1964
            from atproto import COLLECTION_TO_TYPE, did_to_handle
×
1965

1966
            handle = suffix = ''
×
1967
            if match := FEDI_URL_RE.match(url):
×
1968
                handle = match.group(2)
×
1969
                if match.group(4):
×
1970
                    suffix = "'s post"
×
1971
            elif match := BSKY_APP_URL_RE.match(url):
×
1972
                handle = match.group('id')
×
1973
                if match.group('tid'):
×
1974
                    suffix = "'s post"
×
1975
            elif match := AT_URI_RE.match(url):
×
1976
                handle = match.group('repo')
×
1977
                if coll := match.group('collection'):
×
1978
                    suffix = f"'s {COLLECTION_TO_TYPE.get(coll) or 'post'}"
×
1979
                url = bluesky.at_uri_to_web_url(url)
×
1980
            elif url.startswith('did:'):
×
1981
                handle = url
×
UNCOV
1982
                url = bluesky.Bluesky.user_url(handle)
×
1983

1984
            if handle:
×
1985
                if handle.startswith('did:'):
×
1986
                    handle = did_to_handle(handle) or handle
×
UNCOV
1987
                content = f'@{handle}{suffix}'
×
1988

1989
            if url:
×
UNCOV
1990
                content = common.pretty_link(url, text=content, user=user)
×
1991

1992
        obj.content = (obj_as1.get('content')
1✔
1993
                       or obj_as1.get('displayName')
1994
                       or obj_as1.get('summary'))
1995
        obj.url = as1.get_url(obj_as1)
1✔
1996

1997
        if type in ('like', 'follow', 'repost', 'share') or not obj.content:
1✔
1998
            inner_as1 = as1.get_object(obj_as1)
1✔
1999
            obj.inner_url = as1.get_url(inner_as1) or inner_as1.get('id')
1✔
2000
            if obj.url:
1✔
2001
                obj.phrase = common.pretty_link(
1✔
2002
                    obj.url, text=obj.phrase, attrs={'class': 'u-url'}, user=user)
2003
            if content:
1✔
2004
                obj.content = content
1✔
2005
                obj.url = url
1✔
2006
            elif obj.inner_url:
1✔
2007
                obj.content = common.pretty_link(obj.inner_url, max_length=50)
1✔
2008

2009
    return objects, new_before, new_after
1✔
2010

2011

2012
def hydrate(activity, fields=('author', 'actor', 'object')):
1✔
2013
    """Hydrates fields in an AS1 activity, in place.
2014

2015
    Args:
2016
      activity (dict): AS1 activity
2017
      fields (sequence of str): names of fields to hydrate. If they're string ids,
2018
        loads them from the datastore, if possible, and replaces them with their dict
2019
        AS1 objects.
2020

2021
    Returns:
2022
      sequence of :class:`google.cloud.ndb.tasklets.Future`: tasklets for hydrating
2023
        each field. Wait on these before using ``activity``.
2024
    """
2025
    def _hydrate(field):
1✔
2026
        def maybe_set(future):
1✔
2027
            if future.result() and future.result().as1:
1✔
2028
                activity[field] = future.result().as1
1✔
2029
        return maybe_set
1✔
2030

2031
    futures = []
1✔
2032

2033
    for field in fields:
1✔
2034
        val = as1.get_object(activity, field)
1✔
2035
        if val and val.keys() <= set(['id']):
1✔
2036
            # TODO: extract a Protocol class method out of User.profile_id,
2037
            # then use that here instead. the catch is that we'd need to
2038
            # determine Protocol for every id, which is expensive.
2039
            #
2040
            # same TODO is in models.fetch_objects
2041
            id = val['id']
1✔
2042
            if id.startswith('did:'):
1✔
UNCOV
2043
                id = f'at://{id}/app.bsky.actor.profile/self'
×
2044

2045
            future = Object.get_by_id_async(id)
1✔
2046
            future.add_done_callback(_hydrate(field))
1✔
2047
            futures.append(future)
1✔
2048

2049
    return futures
1✔
2050

2051

2052
def fetch_page(query, model_class, by=None):
1✔
2053
    """Fetches a page of results from a datastore query.
2054

2055
    Uses the ``before`` and ``after`` query params (if provided; should be
2056
    ISO8601 timestamps) and the ``by`` property to identify the page to fetch.
2057

2058
    Populates a ``log_url_path`` property on each result entity that points to a
2059
    its most recent logged request.
2060

2061
    Args:
2062
      query (google.cloud.ndb.query.Query)
2063
      model_class (class)
2064
      by (ndb.model.Property): paging property, eg :attr:`Object.updated`
2065
        or :attr:`Object.created`
2066

2067
    Returns:
2068
      (list of Object or Follower, str, str) tuple: (results, new_before,
2069
      new_after), where new_before and new_after are query param values for
2070
      ``before`` and ``after`` to fetch the previous and next pages,
2071
      respectively
2072
    """
2073
    assert by
1✔
2074

2075
    # if there's a paging param ('before' or 'after'), update query with it
2076
    # TODO: unify this with Bridgy's user page
2077
    def get_paging_param(param):
1✔
2078
        val = request.values.get(param)
1✔
2079
        if val:
1✔
2080
            try:
1✔
2081
                dt = util.parse_iso8601(val.replace(' ', '+'))
1✔
2082
            except BaseException as e:
1✔
2083
                error(f"Couldn't parse {param}, {val!r} as ISO8601: {e}")
1✔
2084
            if dt.tzinfo:
1✔
2085
                dt = dt.astimezone(timezone.utc).replace(tzinfo=None)
1✔
2086
            return dt
1✔
2087

2088
    before = get_paging_param('before')
1✔
2089
    after = get_paging_param('after')
1✔
2090
    if before and after:
1✔
UNCOV
2091
        error("can't handle both before and after")
×
2092
    elif after:
1✔
2093
        query = query.filter(by >= after).order(by)
1✔
2094
    elif before:
1✔
2095
        query = query.filter(by < before).order(-by)
1✔
2096
    else:
2097
        query = query.order(-by)
1✔
2098

2099
    query_iter = query.iter()
1✔
2100
    results = sorted(itertools.islice(query_iter, 0, PAGE_SIZE),
1✔
2101
                     key=lambda r: r.updated, reverse=True)
2102

2103
    # calculate new paging param(s)
2104
    has_next = results and query_iter.probably_has_next()
1✔
2105
    new_after = (
1✔
2106
        before if before
2107
        else results[0].updated if has_next and after
2108
        else None)
2109
    if new_after:
1✔
2110
        new_after = new_after.isoformat()
1✔
2111

2112
    new_before = (
1✔
2113
        after if after else
2114
        results[-1].updated if has_next
2115
        else None)
2116
    if new_before:
1✔
2117
        new_before = new_before.isoformat()
1✔
2118

2119
    return results, new_before, new_after
1✔
2120

2121

2122
def maybe_truncate_key_id(id):
1✔
2123
    """Returns id, truncated to ``_MAX_KEYPART_BYTES`` if it's longer."""
2124
    if len(id) > _MAX_KEYPART_BYTES:
1✔
2125
        # TODO: handle Unicode chars. naive approach is to UTF-8 encode,
2126
        # truncate, then decode, but that might cut mid character. easier to just
2127
        # hope/assume the URL is already URL-encoded.
2128
        truncated = id[:_MAX_KEYPART_BYTES]
1✔
2129
        logger.warning(f'Truncating id {id} to {_MAX_KEYPART_BYTES} chars: {truncated}')
1✔
2130
        return truncated
1✔
2131

2132
    return id
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc