• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

snarfed / bridgy-fed / 6f815cb4-4d7d-487e-a07a-218399f88a89

14 Jan 2025 12:19AM UTC coverage: 92.752%. Remained the same
6f815cb4-4d7d-487e-a07a-218399f88a89

push

circleci

snarfed
updates for ndb context caching everything: Object.new/changed logic, tests

for #1149, 18aa302da

14 of 14 new or added lines in 2 files covered. (100.0%)

39 existing lines in 3 files now uncovered.

4479 of 4829 relevant lines covered (92.75%)

0.93 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

95.64
/models.py
1
"""Datastore model classes."""
2
import copy
1✔
3
from datetime import timedelta, timezone
1✔
4
from functools import lru_cache
1✔
5
import itertools
1✔
6
import json
1✔
7
import logging
1✔
8
import random
1✔
9
import re
1✔
10
from threading import Lock
1✔
11
from urllib.parse import quote, urlparse
1✔
12

13
from arroba.util import parse_at_uri
1✔
14
import cachetools
1✔
15
from Crypto.PublicKey import RSA
1✔
16
from flask import request
1✔
17
from google.cloud import ndb
1✔
18
from granary import as1, as2, atom, bluesky, microformats2
1✔
19
from granary.bluesky import AT_URI_PATTERN, BSKY_APP_URL_RE
1✔
20
from granary.source import html_to_text
1✔
21
from oauth_dropins.webutil import util
1✔
22
from oauth_dropins.webutil.appengine_info import DEBUG
1✔
23
from oauth_dropins.webutil.flask_util import error
1✔
24
from oauth_dropins.webutil.models import JsonProperty, StringIdModel
1✔
25
from oauth_dropins.webutil.util import ellipsize, json_dumps, json_loads
1✔
26
from requests import RequestException
1✔
27

28
import common
1✔
29
from common import (
1✔
30
    base64_to_long,
31
    DOMAIN_RE,
32
    long_to_base64,
33
    OLD_ACCOUNT_AGE,
34
    PROTOCOL_DOMAINS,
35
    report_error,
36
    unwrap,
37
)
38
import ids
1✔
39
import memcache
1✔
40

41
# maps string label to Protocol subclass. values are populated by ProtocolUserMeta.
42
# (we used to wait for ProtocolUserMeta to populate the keys as well, but that was
43
# awkward to use in datastore model properties with choices, below; it required
44
# overriding them in reset_model_properties, which was always flaky.)
45
PROTOCOLS = {label: None for label in (
1✔
46
    'activitypub',
47
    'ap',
48
    'atproto',
49
    'bsky',
50
    'ostatus',
51
    'web',
52
    'webmention',
53
    'ui',
54
)}
55
if DEBUG:
1✔
56
    PROTOCOLS.update({label: None for label in (
1✔
57
        'fa',
58
        'fake',
59
        'efake',
60
        'other',
61
    )})
62

63
# maps string kind (eg 'MagicKey') to Protocol subclass.
64
# populated in ProtocolUserMeta
65
PROTOCOLS_BY_KIND = {}
1✔
66

67

68
# 2048 bits makes tests slow, so use 1024 for them
69
KEY_BITS = 1024 if DEBUG else 2048
1✔
70
PAGE_SIZE = 20
1✔
71

72
# auto delete most old objects via the Object.expire property
73
# https://cloud.google.com/datastore/docs/ttl
74
DONT_EXPIRE_OBJECT_TYPES = \
1✔
75
    as1.ACTOR_TYPES | as1.POST_TYPES | set(('event', 'question'))
76
OBJECT_EXPIRE_AGE = timedelta(days=90)
1✔
77

78
GET_ORIGINALS_CACHE_EXPIRATION = timedelta(days=1)
1✔
79
FOLLOWERS_CACHE_EXPIRATION = timedelta(hours=2)
1✔
80

81
logger = logging.getLogger(__name__)
1✔
82

83

84
class Target(ndb.Model):
1✔
85
    r""":class:`protocol.Protocol` + URI pairs for identifying objects.
86

87
    These are currently used for:
88

89
    * delivery destinations, eg ActivityPub inboxes, webmention targets, etc.
90
    * copies of :class:`Object`\s and :class:`User`\s elsewhere,
91
      eg ``at://`` URIs for ATProto records, nevent etc bech32-encoded Nostr ids,
92
      ATProto user DIDs, etc.
93

94
    Used in :class:`google.cloud.ndb.model.StructuredProperty`\s inside
95
    :class:`Object` and :class:`User`; not stored as top-level entities in the
96
    datastore.
97

98
    ndb implements this by hoisting each property here into a corresponding
99
    property on the parent entity, prefixed by the StructuredProperty name
100
    below, eg ``delivered.uri``, ``delivered.protocol``, etc.
101

102
    For repeated StructuredPropertys, the hoisted properties are all repeated on
103
    the parent entity, and reconstructed into StructuredPropertys based on their
104
    order.
105

106
    https://googleapis.dev/python/python-ndb/latest/model.html#google.cloud.ndb.model.StructuredProperty
107
    """
108
    uri = ndb.StringProperty(required=True)
1✔
109
    protocol = ndb.StringProperty(choices=list(PROTOCOLS.keys()), required=True)
1✔
110

111
    def __eq__(self, other):
1✔
112
        """Equality excludes Targets' :class:`Key`."""
113
        return self.uri == other.uri and self.protocol == other.protocol
1✔
114

115
    def __hash__(self):
1✔
116
        """Allow hashing so these can be dict keys."""
117
        return hash((self.protocol, self.uri))
1✔
118

119

120
class DM(ndb.Model):
1✔
121
    """:class:`protocol.Protocol` + type pairs for identifying sent DMs.
122

123
    Used in :attr:`User.sent_dms`.
124

125
    https://googleapis.dev/python/python-ndb/latest/model.html#google.cloud.ndb.model.StructuredProperty
126
    """
127
    TYPES = (
1✔
128
        'request_bridging',
129
        'replied_to_bridged_user',
130
        'welcome',
131
    )
132
    type = ndb.StringProperty(choices=TYPES, required=True)
1✔
133
    protocol = ndb.StringProperty(choices=list(PROTOCOLS.keys()), required=True)
1✔
134

135
    def __eq__(self, other):
1✔
136
        """Equality excludes Targets' :class:`Key`."""
137
        return self.type == other.type and self.protocol == other.protocol
1✔
138

139

140
class ProtocolUserMeta(type(ndb.Model)):
1✔
141
    """:class:`User` metaclass. Registers all subclasses in the ``PROTOCOLS`` global."""
142
    def __new__(meta, name, bases, class_dict):
1✔
143
        cls = super().__new__(meta, name, bases, class_dict)
1✔
144

145
        if hasattr(cls, 'LABEL') and cls.LABEL not in ('protocol', 'user'):
1✔
146
            for label in (cls.LABEL, cls.ABBREV) + cls.OTHER_LABELS:
1✔
147
                if label:
1✔
148
                    PROTOCOLS[label] = cls
1✔
149

150
        PROTOCOLS_BY_KIND[cls._get_kind()] = cls
1✔
151

152
        return cls
1✔
153

154

155
def reset_protocol_properties():
1✔
156
    """Recreates various protocol properties to include choices from ``PROTOCOLS``."""
157
    abbrevs = f'({"|".join(PROTOCOLS.keys())}|fed)'
1✔
158
    common.SUBDOMAIN_BASE_URL_RE = re.compile(
1✔
159
        rf'^https?://({abbrevs}\.brid\.gy|localhost(:8080)?)/(convert/|r/)?({abbrevs}/)?(?P<path>.+)')
160
    ids.COPIES_PROTOCOLS = tuple(label for label, proto in PROTOCOLS.items()
1✔
161
                                 if proto and proto.HAS_COPIES)
162

163

164
class User(StringIdModel, metaclass=ProtocolUserMeta):
1✔
165
    """Abstract base class for a Bridgy Fed user.
166

167
    Stores some protocols' keypairs. Currently:
168

169
    * RSA keypair for ActivityPub HTTP Signatures
170
      properties: ``mod``, ``public_exponent``, ``private_exponent``, all
171
      encoded as base64url (ie URL-safe base64) strings as described in RFC
172
      4648 and section 5.1 of the Magic Signatures spec:
173
      https://tools.ietf.org/html/draft-cavage-http-signatures-12
174
    * *Not* K-256 signing or rotation keys for AT Protocol, those are stored in
175
      :class:`arroba.datastore_storage.AtpRepo` entities
176
    """
177
    obj_key = ndb.KeyProperty(kind='Object')  # user profile
1✔
178
    mod = ndb.StringProperty()
1✔
179
    use_instead = ndb.KeyProperty()
1✔
180

181
    # Proxy copies of this user elsewhere, eg DIDs for ATProto records, bech32
182
    # npub Nostr ids, etc. Similar to rel-me links in microformats2, alsoKnownAs
183
    # in DID docs (and now AS2), etc.
184
    # TODO: switch to using Object.copies on the user profile object?
185
    copies = ndb.StructuredProperty(Target, repeated=True)
1✔
186

187
    # these are for ActivityPub HTTP Signatures
188
    public_exponent = ndb.StringProperty()
1✔
189
    private_exponent = ndb.StringProperty()
1✔
190

191
    # set to True for users who asked me to be opted out instead of putting
192
    # #nobridge in their profile
193
    manual_opt_out = ndb.BooleanProperty()
1✔
194

195
    # protocols that this user has explicitly opted into. protocols that don't
196
    # require explicit opt in are omitted here. choices is populated in
197
    # reset_protocol_properties.
198
    enabled_protocols = ndb.StringProperty(repeated=True, choices=list(PROTOCOLS.keys()))
1✔
199

200
    # DMs that we've attempted to send to this user
201
    sent_dms = ndb.StructuredProperty(DM, repeated=True)
1✔
202

203
    created = ndb.DateTimeProperty(auto_now_add=True)
1✔
204
    updated = ndb.DateTimeProperty(auto_now=True)
1✔
205

206
    # `existing` attr is set by get_or_create
207

208
    # OLD. some stored entities still have these; do not reuse.
209
    # direct = ndb.BooleanProperty(default=False)
210
    # actor_as2 = JsonProperty()
211
    # protocol-specific state
212
    # atproto_notifs_indexed_at = ndb.TextProperty()
213
    # atproto_feed_indexed_at = ndb.TextProperty()
214

215
    def __init__(self, **kwargs):
1✔
216
        """Constructor.
217

218
        Sets :attr:`obj` explicitly because however
219
        :class:`google.cloud.ndb.model.Model` sets it doesn't work with
220
        ``@property`` and ``@obj.setter`` below.
221
        """
222
        obj = kwargs.pop('obj', None)
1✔
223
        super().__init__(**kwargs)
1✔
224

225
        if obj:
1✔
226
            self.obj = obj
1✔
227

228
        self.lock = Lock()
1✔
229

230
    @classmethod
1✔
231
    def new(cls, **kwargs):
1✔
232
        """Try to prevent instantiation. Use subclasses instead."""
233
        raise NotImplementedError()
×
234

235
    def _post_put_hook(self, future):
1✔
236
        logger.debug(f'Wrote {self.key}')
1✔
237

238
    def add(self, prop, val):
1✔
239
        """Adds a value to a multiply-valued property. Uses ``self.lock``.
240

241
        Args:
242
          prop (str)
243
          val
244
        """
245
        with self.lock:
1✔
246
            util.add(getattr(self, prop), val)
1✔
247

248
        if prop == 'copies':
1✔
249
            memcache.pickle_memcache.set(memcache.memoize_key(
1✔
250
                get_original_user_key, val.uri), self.key)
251

252
    @classmethod
1✔
253
    def get_by_id(cls, id, allow_opt_out=False, **kwargs):
1✔
254
        """Override to follow ``use_instead`` property and ``opt-out` status.
255

256
        Returns None if the user is opted out.
257
        """
258
        user = cls._get_by_id(id, **kwargs)
1✔
259
        if user and user.use_instead:
1✔
260
            logger.info(f'{user.key} use_instead => {user.use_instead}')
1✔
261
            user = user.use_instead.get()
1✔
262

263
        if not user:
1✔
264
            return None
1✔
265

266
        if user.status and not allow_opt_out:
1✔
267
            logger.info(f'{user.key} is {user.status}')
1✔
268
            return None
1✔
269

270
        return user
1✔
271

272
    @classmethod
1✔
273
    def get_or_create(cls, id, propagate=False, allow_opt_out=False,
1✔
274
                      reload=False, **kwargs):
275
        """Loads and returns a :class:`User`. Creates it if necessary.
276

277
        Not transactional because transactions don't read or write memcache. :/
278
        Fortunately we don't really depend on atomicity for anything, last
279
        writer wins is pretty much always fine.
280

281
        Args:
282
          propagate (bool): whether to create copies of this user in push-based
283
            protocols, eg ATProto and Nostr.
284
          allow_opt_out (bool): whether to allow and create the user if they're
285
            currently opted out
286
          reload (bool): whether to reload profile always, vs only if necessary
287
          kwargs: passed through to ``cls`` constructor
288

289
        Returns:
290
          User: existing or new user, or None if the user is opted out
291
        """
292
        assert cls != User
1✔
293

294
        user = cls.get_by_id(id, allow_opt_out=True)
1✔
295
        if user:
1✔
296
            if reload:
1✔
297
                user.reload_profile(gateway=True, raise_=False)
1✔
298

299
            if user.status and not allow_opt_out:
1✔
300
                return None
1✔
301
            user.existing = True
1✔
302

303
            # TODO: propagate more fields?
304
            changed = False
1✔
305
            for field in ['obj', 'obj_key']:
1✔
306
                old_val = getattr(user, field, None)
1✔
307
                new_val = kwargs.get(field)
1✔
308
                if old_val is None and new_val is not None:
1✔
309
                    setattr(user, field, new_val)
×
310
                    changed = True
×
311

312
            if enabled_protocols := kwargs.get('enabled_protocols'):
1✔
313
                user.enabled_protocols = (set(user.enabled_protocols)
1✔
314
                                          | set(enabled_protocols))
315
                changed = True
1✔
316

317
            if not propagate:
1✔
318
                if changed:
1✔
319
                    user.put()
1✔
320
                return user
1✔
321

322
        else:
323
            if orig_key := get_original_user_key(id):
1✔
324
                orig = orig_key.get()
1✔
325
                if orig.status and not allow_opt_out:
1✔
326
                    return None
×
327
                orig.existing = False
1✔
328
                return orig
1✔
329

330
            user = cls(id=id, **kwargs)
1✔
331
            user.existing = False
1✔
332
            user.reload_profile(gateway=True, raise_=False)
1✔
333
            if user.status and not allow_opt_out:
1✔
334
                return None
1✔
335

336
        if propagate and not user.status:
1✔
337
            for label in user.enabled_protocols + list(user.DEFAULT_ENABLED_PROTOCOLS):
1✔
338
                proto = PROTOCOLS[label]
1✔
339
                if proto == cls:
1✔
340
                    continue
×
341
                elif proto.HAS_COPIES:
1✔
342
                    if not user.get_copy(proto) and user.is_enabled(proto):
1✔
343
                        try:
1✔
344
                            proto.create_for(user)
1✔
345
                        except (ValueError, AssertionError):
1✔
346
                            logger.info(f'failed creating {proto.LABEL} copy',
1✔
347
                                        exc_info=True)
348
                            util.remove(user.enabled_protocols, proto.LABEL)
1✔
349
                    else:
350
                        logger.debug(f'{proto.LABEL} not enabled or user copy already exists, skipping propagate')
1✔
351

352
        # generate keys for all protocols _except_ our own
353
        #
354
        # these can use urandom() and do nontrivial math, so they can take time
355
        # depending on the amount of randomness available and compute needed.
356
        if not user.existing and cls.LABEL != 'activitypub':
1✔
357
            key = RSA.generate(KEY_BITS,
1✔
358
                               randfunc=random.randbytes if DEBUG else None)
359
            user.mod = long_to_base64(key.n)
1✔
360
            user.public_exponent = long_to_base64(key.e)
1✔
361
            user.private_exponent = long_to_base64(key.d)
1✔
362

363
        try:
1✔
364
            user.put()
1✔
365
        except AssertionError as e:
×
366
            error(f'Bad {cls.__name__} id {id} : {e}')
×
367

368
        logger.debug(('Updated ' if user.existing else 'Created new ') + str(user))
1✔
369
        return user
1✔
370

371
    @property
1✔
372
    def obj(self):
1✔
373
        """Convenience accessor that loads :attr:`obj_key` from the datastore."""
374
        if self.obj_key:
1✔
375
            if not hasattr(self, '_obj'):
1✔
376
                self._obj = self.obj_key.get()
1✔
377
            return self._obj
1✔
378

379
    @obj.setter
1✔
380
    def obj(self, obj):
1✔
381
        if obj:
1✔
382
            assert isinstance(obj, Object)
1✔
383
            assert obj.key
1✔
384
            self._obj = obj
1✔
385
            self.obj_key = obj.key
1✔
386
        else:
387
            self._obj = self.obj_key = None
1✔
388

389
    def delete(self, proto=None):
1✔
390
        """Deletes a user's bridged actors in all protocols or a specific one.
391

392
        Args:
393
          proto (Protocol): optional
394
        """
395
        now = util.now().isoformat()
1✔
396
        proto_label = proto.LABEL if proto else 'all'
1✔
397
        delete_id = f'{self.profile_id()}#delete-user-{proto_label}-{now}'
1✔
398
        delete = Object(id=delete_id, source_protocol=self.LABEL, our_as1={
1✔
399
            'id': delete_id,
400
            'objectType': 'activity',
401
            'verb': 'delete',
402
            'actor': self.key.id(),
403
            'object': self.key.id(),
404
        })
405
        delete.put()
1✔
406
        self.deliver(delete, from_user=self, to_proto=proto)
1✔
407

408
    @classmethod
1✔
409
    def load_multi(cls, users):
1✔
410
        """Loads :attr:`obj` for multiple users in parallel.
411

412
        Args:
413
          users (sequence of User)
414
        """
415
        objs = ndb.get_multi(u.obj_key for u in users if u.obj_key)
1✔
416
        keys_to_objs = {o.key: o for o in objs if o}
1✔
417

418
        for u in users:
1✔
419
            u._obj = keys_to_objs.get(u.obj_key)
1✔
420

421
    @ndb.ComputedProperty
1✔
422
    def handle(self):
1✔
423
        """This user's unique, human-chosen handle, eg ``@me@snarfed.org``.
424

425
        To be implemented by subclasses.
426
        """
427
        raise NotImplementedError()
×
428

429
    @ndb.ComputedProperty
1✔
430
    def readable_id(self):
1✔
431
        """DEPRECATED: replaced by handle. Kept for backward compatibility."""
432
        return None
1✔
433

434
    @ndb.ComputedProperty
1✔
435
    def status(self):
1✔
436
        """Whether this user is blocked or opted out.
437

438
        Optional. Current possible values:
439
          * ``opt-out``: if ``#nobridge`` or ``#nobot`` is in the profile
440
            description/bio, or if the user or domain has manually opted out.
441
            Some protocols also have protocol-specific opt out logic, eg Bluesky
442
            accounts that have disabled logged out view.
443
          * ``blocked``: if the user fails our validation checks, eg
444
            ``REQUIRES_NAME`` or ``REQUIRES_AVATAR`` if either of those are
445
            ``True` for this protocol.
446
          * `owns-webfinger`: a :class:`web.Web` user that looks like a
447
            fediverse server
448
          * `no-feed-or-webmention`: a :class:`web.Web` user that doesn't have
449
            an RSS or Atom feed or webmention endpoint and has never sent us a
450
            webmention
451

452
        Duplicates ``util.is_opt_out`` in Bridgy!
453

454
        https://github.com/snarfed/bridgy-fed/issues/666
455
        """
456
        if self.manual_opt_out:
1✔
457
            return 'opt-out'
1✔
458

459
        if not self.obj or not self.obj.as1:
1✔
460
            return None
1✔
461

462
        if self.REQUIRES_AVATAR and not self.obj.as1.get('image'):
1✔
463
            return 'blocked'
1✔
464

465
        name = self.obj.as1.get('displayName')
1✔
466
        if self.REQUIRES_NAME and (not name or name in (self.handle, self.key.id())):
1✔
467
            return 'blocked'
1✔
468

469
        if self.REQUIRES_OLD_ACCOUNT:
1✔
470
            if published := self.obj.as1.get('published'):
1✔
471
                if util.now() - util.parse_iso8601(published) < OLD_ACCOUNT_AGE:
1✔
472
                    return 'blocked'
1✔
473

474
        summary = html_to_text(self.obj.as1.get('summary', ''), ignore_links=True)
1✔
475
        name = self.obj.as1.get('displayName', '')
1✔
476

477
        # #nobridge overrides enabled_protocols
478
        if '#nobridge' in summary or '#nobridge' in name:
1✔
479
            return 'opt-out'
1✔
480

481
        # user has explicitly opted in. should go after spam filter (REQUIRES_*)
482
        # checks, but before is_public and #nobot
483
        if self.enabled_protocols:
1✔
484
            return None
1✔
485

486
        if not as1.is_public(self.obj.as1, unlisted=False):
1✔
487
            return 'opt-out'
1✔
488

489
        # enabled_protocols overrides #nobot
490
        if '#nobot' in summary or '#nobot' in name:
1✔
491
            return 'opt-out'
1✔
492

493
    def is_enabled(self, to_proto, explicit=False):
1✔
494
        """Returns True if this user can be bridged to a given protocol.
495

496
        Reasons this might return False:
497
        * We haven't turned on bridging these two protocols yet.
498
        * The user is opted out or blocked.
499
        * The user is on a domain that's opted out or blocked.
500
        * The from protocol requires opt in, and the user hasn't opted in.
501
        * ``explicit`` is True, and this protocol supports ``to_proto`` by
502
          default, but the user hasn't explicitly opted into it.
503

504
        Args:
505
          to_proto (Protocol subclass)
506
          explicit (bool)
507

508
        Returns:
509
          bool:
510
        """
511
        from protocol import Protocol
1✔
512
        assert issubclass(to_proto, Protocol)
1✔
513

514
        if self.__class__ == to_proto:
1✔
515
            return True
1✔
516

517
        from_label = self.LABEL
1✔
518
        to_label = to_proto.LABEL
1✔
519

520
        if bot_protocol := Protocol.for_bridgy_subdomain(self.key.id()):
1✔
521
            return to_proto != bot_protocol
1✔
522

523
        elif self.manual_opt_out:
1✔
524
            return False
1✔
525

526
        elif to_label in self.enabled_protocols:
1✔
527
            return True
1✔
528

529
        elif self.status:
1✔
530
            return False
1✔
531

532
        elif to_label in self.DEFAULT_ENABLED_PROTOCOLS and not explicit:
1✔
533
            return True
1✔
534

535
        return False
1✔
536

537
    def enable_protocol(self, to_proto):
1✔
538
        """Adds ``to_proto` to :attr:`enabled_protocols`.
539

540
        Also sends a welcome DM to the user (via a send task) if their protocol
541
        supports DMs.
542

543
        Args:
544
          to_proto (:class:`protocol.Protocol` subclass)
545
        """
546
        added = False
1✔
547

548
        if to_proto.LABEL in ids.COPIES_PROTOCOLS:
1✔
549
            # do this even if there's an existing copy since we might need to
550
            # reactivate it, which create_for should do
551
            to_proto.create_for(self)
1✔
552

553
        @ndb.transactional()
1✔
554
        def enable():
1✔
555
            user = self.key.get()
1✔
556
            if to_proto.LABEL not in user.enabled_protocols:
1✔
557
                user.enabled_protocols.append(to_proto.LABEL)
1✔
558
                util.add(user.sent_dms, DM(protocol=to_proto.LABEL, type='welcome'))
1✔
559
                user.put()
1✔
560
                nonlocal added
561
                added = True
1✔
562

563
            return user
1✔
564

565
        new_self = enable()
1✔
566
        # populate newly enabled protocol in this instance
567
        self.enabled_protocols = new_self.enabled_protocols
1✔
568
        self.copies = new_self.copies
1✔
569
        if self.obj:
1✔
570
            self.obj.copies = new_self.obj.copies
1✔
571

572
        if added:
1✔
573
            import dms
1✔
574
            dms.maybe_send(from_proto=to_proto, to_user=self, type='welcome',
1✔
575
                           text=f"""\
576
Welcome to Bridgy Fed! Your account will soon be bridged to {to_proto.PHRASE} at {self.user_link(proto=to_proto, name=False)}. <a href="https://fed.brid.gy/docs">See the docs</a> and <a href="https://{common.PRIMARY_DOMAIN}{self.user_page_path()}">your user page</a> for more information. To disable this and delete your bridged profile, block this account.""")
577

578
        msg = f'Enabled {to_proto.LABEL} for {self.key.id()} : {self.user_page_path()}'
1✔
579
        logger.info(msg)
1✔
580

581
    def disable_protocol(self, to_proto):
1✔
582
        """Removes ``to_proto` from :attr:`enabled_protocols`.
583

584
        Args:
585
          to_proto (:class:`protocol.Protocol` subclass)
586
        """
587
        @ndb.transactional()
1✔
588
        def disable():
1✔
589
            user = self.key.get()
1✔
590
            util.remove(user.enabled_protocols, to_proto.LABEL)
1✔
591
            user.put()
1✔
592

593
        disable()
1✔
594
        util.remove(self.enabled_protocols, to_proto.LABEL)
1✔
595

596
        msg = f'Disabled {to_proto.LABEL} for {self.key.id()} : {self.user_page_path()}'
1✔
597
        logger.info(msg)
1✔
598

599
    def handle_as(self, to_proto):
1✔
600
        """Returns this user's handle in a different protocol.
601

602
        Args:
603
          to_proto (str or Protocol)
604

605
        Returns:
606
          str
607
        """
608
        if isinstance(to_proto, str):
1✔
609
            to_proto = PROTOCOLS[to_proto]
1✔
610

611
        # override to-ATProto to use custom domain handle in DID doc
612
        from atproto import ATProto, did_to_handle
1✔
613
        if to_proto == ATProto:
1✔
614
            if did := self.get_copy(ATProto):
1✔
615
                if handle := did_to_handle(did, remote=False):
1✔
616
                    return handle
1✔
617

618
        # override web users to always use domain instead of custom username
619
        # TODO: fall back to id if handle is unset?
620
        handle = self.key.id() if self.LABEL == 'web' else self.handle
1✔
621
        if not handle:
1✔
622
            return None
1✔
623

624
        return ids.translate_handle(handle=handle, from_=self.__class__,
1✔
625
                                    to=to_proto, enhanced=False)
626

627
    def id_as(self, to_proto):
1✔
628
        """Returns this user's id in a different protocol.
629

630
        Args:
631
          to_proto (str or Protocol)
632

633
        Returns:
634
          str
635
        """
636
        if isinstance(to_proto, str):
1✔
637
            to_proto = PROTOCOLS[to_proto]
1✔
638

639
        return ids.translate_user_id(id=self.key.id(), from_=self.__class__,
1✔
640
                                     to=to_proto)
641

642
    def handle_or_id(self):
1✔
643
        """Returns handle if we know it, otherwise id."""
644
        return self.handle or self.key.id()
1✔
645

646
    def public_pem(self):
1✔
647
        """
648
        Returns:
649
          bytes:
650
        """
651
        rsa = RSA.construct((base64_to_long(str(self.mod)),
1✔
652
                             base64_to_long(str(self.public_exponent))))
653
        return rsa.exportKey(format='PEM')
1✔
654

655
    def private_pem(self):
1✔
656
        """
657
        Returns:
658
          bytes:
659
        """
660
        assert self.mod and self.public_exponent and self.private_exponent, str(self)
1✔
661
        rsa = RSA.construct((base64_to_long(str(self.mod)),
1✔
662
                             base64_to_long(str(self.public_exponent)),
663
                             base64_to_long(str(self.private_exponent))))
664
        return rsa.exportKey(format='PEM')
1✔
665

666
    def name(self):
1✔
667
        """Returns this user's human-readable name, eg ``Ryan Barrett``."""
668
        if self.obj and self.obj.as1:
1✔
669
            name = self.obj.as1.get('displayName')
1✔
670
            if name:
1✔
671
                return name
1✔
672

673
        return self.handle_or_id()
1✔
674

675
    def web_url(self):
1✔
676
        """Returns this user's web URL (homepage), eg ``https://foo.com/``.
677

678
        To be implemented by subclasses.
679

680
        Returns:
681
          str
682
        """
683
        raise NotImplementedError()
×
684

685
    def is_web_url(self, url, ignore_www=False):
1✔
686
        """Returns True if the given URL is this user's web URL (homepage).
687

688
        Args:
689
          url (str)
690
          ignore_www (bool): if True, ignores ``www.`` subdomains
691

692
        Returns:
693
          bool:
694
        """
695
        if not url:
1✔
696
            return False
1✔
697

698
        url = url.strip().rstrip('/')
1✔
699
        url = re.sub(r'^(https?://)www\.', r'\1', url)
1✔
700
        parsed_url = urlparse(url)
1✔
701
        if parsed_url.scheme not in ('http', 'https', ''):
1✔
702
            return False
1✔
703

704
        this = self.web_url().rstrip('/')
1✔
705
        this = re.sub(r'^(https?://)www\.', r'\1', this)
1✔
706
        parsed_this = urlparse(this)
1✔
707

708
        return (url == this or url == parsed_this.netloc or
1✔
709
                parsed_url[1:] == parsed_this[1:])  # ignore http vs https
710

711
    def id_uri(self):
1✔
712
        """Returns the user id as a URI.
713

714
        Sometimes this is the user id itself, eg ActivityPub actor ids.
715
        Sometimes it's a bit different, eg at://did:plc:... for ATProto user,
716
        https://site.com for Web users.
717

718
        Returns:
719
          str
720
        """
721
        return self.key.id()
1✔
722

723
    def profile_id(self):
1✔
724
        """Returns the id of this user's profile object in its native protocol.
725

726
        Examples:
727

728
        * Web: home page URL, eg ``https://me.com/``
729
        * ActivityPub: actor URL, eg ``https://instance.com/users/me``
730
        * ATProto: profile AT URI, eg ``at://did:plc:123/app.bsky.actor.profile/self``
731

732
        Defaults to this user's key id.
733

734
        Returns:
735
          str or None:
736
        """
737
        return ids.profile_id(id=self.key.id(), proto=self)
1✔
738

739
    def reload_profile(self, **kwargs):
1✔
740
        """Reloads this user's identity and profile from their native protocol.
741

742
        Populates the reloaded profile :class:`Object` in ``self.obj``.
743

744
        Args:
745
          kwargs: passed through to :meth:`Protocol.load`
746
        """
747
        obj = self.load(self.profile_id(), remote=True, **kwargs)
1✔
748
        if obj:
1✔
749
            self.obj = obj
1✔
750

751
    def user_page_path(self, rest=None):
1✔
752
        """Returns the user's Bridgy Fed user page path."""
753
        path = f'/{self.ABBREV}/{self.handle_or_id()}'
1✔
754

755
        if rest:
1✔
756
            if not rest.startswith('?'):
1✔
757
                path += '/'
1✔
758
            path += rest
1✔
759

760
        return path
1✔
761

762
    def get_copy(self, proto):
1✔
763
        """Returns the id for the copy of this user in a given protocol.
764

765
        ...or None if no such copy exists. If ``proto`` is this user, returns
766
        this user's key id.
767

768
        Args:
769
          proto: :class:`Protocol` subclass
770

771
        Returns:
772
          str:
773
        """
774
        # don't use isinstance because the testutil Fake protocol has subclasses
775
        if self.LABEL == proto.LABEL:
1✔
776
            return self.key.id()
1✔
777

778
        for copy in self.copies:
1✔
779
            if copy.protocol in (proto.LABEL, proto.ABBREV):
1✔
780
                return copy.uri
1✔
781

782
    def user_link(self, name=True, handle=True, pictures=False, proto=None,
1✔
783
                  proto_fallback=False):
784
        """Returns a pretty HTML link to the user's profile.
785

786
        Can optionally include display name, handle, profile
787
        picture, and/or link to a different protocol that they've enabled.
788

789
        TODO: unify with :meth:`Object.actor_link`?
790

791
        Args:
792
          name (bool): include display name
793
          handle (bool): include handle
794
          pictures (bool): include profile picture and protocol logo
795
          proto (protocol.Protocol): link to this protocol instead of the user's
796
            native protocol
797
          proto_fallback (bool): if True, and ``proto`` is provided and has no
798
            no canonical profile URL for bridged users, uses the user's profile
799
            URL in their native protocol
800
        """
801
        img = name_str = handle_str = dot = logo = a_open = a_close = ''
1✔
802

803
        if proto:
1✔
804
            assert self.is_enabled(proto), f"{proto.LABEL} isn't enabled"
1✔
805
            url = proto.bridged_web_url_for(self, fallback=proto_fallback)
1✔
806
        else:
807
            proto = self.__class__
1✔
808
            url = self.web_url()
1✔
809

810
        if pictures:
1✔
811
            logo = f'<span class="logo" title="{proto.__name__}">{proto.LOGO_HTML}</span> '
1✔
812
            if pic := self.profile_picture():
1✔
813
                img = f'<img src="{pic}" class="profile"> '
1✔
814

815
        if handle:
1✔
816
            handle_str = self.handle_as(proto) or ''
1✔
817

818
        if name and self.name() != handle_str:
1✔
819
            name_str = self.name() or ''
1✔
820

821
        if handle_str and name_str:
1✔
822
            dot = ' &middot; '
1✔
823

824
        if url:
1✔
825
            a_open = f'<a class="h-card u-author" rel="me" href="{url}" title="{name_str}{dot}{handle_str}">'
1✔
826
            a_close = '</a>'
1✔
827

828
        name_html = f'<span style="unicode-bidi: isolate">{ellipsize(name_str, chars=40)}</span>' if name_str else ''
1✔
829
        return f'{logo}{a_open}{img}{name_html}{dot}{ellipsize(handle_str, chars=40)}{a_close}'
1✔
830

831
    def profile_picture(self):
1✔
832
        """Returns the user's profile picture image URL, if available, or None."""
833
        if self.obj and self.obj.as1:
1✔
834
            return util.get_url(self.obj.as1, 'image')
1✔
835

836
    # can't use functools.lru_cache here because we want the cache key to be
837
    # just the user id, not the whole entity
838
    @cachetools.cached(
1✔
839
        cachetools.TTLCache(50000, FOLLOWERS_CACHE_EXPIRATION.total_seconds()),
840
        key=lambda user: user.key.id(), lock=Lock())
841
    @memcache.memoize(key=lambda self: self.key.id(),
1✔
842
                      expire=FOLLOWERS_CACHE_EXPIRATION)
843
    def count_followers(self):
1✔
844
        """Counts this user's followers and followings.
845

846
        Returns:
847
          (int, int) tuple: (number of followers, number following)
848
        """
849
        if self.key.id() in PROTOCOL_DOMAINS:
1✔
850
            # we don't store Followers for protocol bot users any more, so
851
            # follower counts are inaccurate, so don't return them
852
            return (0, 0)
1✔
853

854
        num_followers = Follower.query(Follower.to == self.key,
1✔
855
                                       Follower.status == 'active')\
856
                                .count_async()
857
        num_following = Follower.query(Follower.from_ == self.key,
1✔
858
                                       Follower.status == 'active')\
859
                                .count_async()
860
        return num_followers.get_result(), num_following.get_result()
1✔
861

862

863
class Object(StringIdModel):
1✔
864
    """An activity or other object, eg actor.
865

866
    Key name is the id. We synthesize ids if necessary.
867
    """
868
    LABELS = ('activity',
1✔
869
              # DEPRECATED, replaced by users, notify, feed
870
              'feed', 'notification', 'user')
871

872
    # Keys for user(s) who created or otherwise own this activity.
873
    users = ndb.KeyProperty(repeated=True)
1✔
874
    # User keys who should see this activity in their user page, eg in reply to,
875
    # reaction to, share of, etc.
876
    notify = ndb.KeyProperty(repeated=True)
1✔
877
    # User keys who should see this activity in their feeds, eg followers of its
878
    # creator
879
    feed = ndb.KeyProperty(repeated=True)
1✔
880

881
    # DEPRECATED but still used read only to maintain backward compatibility
882
    # with old Objects in the datastore that we haven't bothered migrating.
883
    domains = ndb.StringProperty(repeated=True)
1✔
884

885
    # choices is populated in reset_protocol_properties, after all User
886
    # subclasses are created, so that PROTOCOLS is fully populated.
887
    # TODO: nail down whether this is ABBREV or LABEL
888
    source_protocol = ndb.StringProperty(choices=list(PROTOCOLS.keys()))
1✔
889
    labels = ndb.StringProperty(repeated=True, choices=LABELS)
1✔
890

891
    # TODO: switch back to ndb.JsonProperty if/when they fix it for the web console
892
    # https://github.com/googleapis/python-ndb/issues/874
893
    as2 = JsonProperty()      # only one of the rest will be populated...
1✔
894
    bsky = JsonProperty()     # Bluesky / AT Protocol
1✔
895
    mf2 = JsonProperty()      # HTML microformats2 item (ie _not_ the top level
1✔
896
                              # parse object with items inside an 'items' field)
897
    our_as1 = JsonProperty()  # AS1 for activities that we generate or modify ourselves
1✔
898
    raw = JsonProperty()      # other standalone data format, eg DID document
1✔
899

900
    # these are full feeds with multiple items, not just this one, so they're
901
    # stored as audit records only. they're not used in to_as1. for Atom/RSS
902
    # based Objects, our_as1 will be populated with an feed_index top-level
903
    # integer field that indexes into one of these.
904
    atom = ndb.TextProperty() # Atom XML
1✔
905
    rss = ndb.TextProperty()  # RSS XML
1✔
906

907
    deleted = ndb.BooleanProperty()
1✔
908

909
    # Copies of this object elsewhere, eg at:// URIs for ATProto records and
910
    # nevent etc bech32-encoded Nostr ids, where this object is the original.
911
    # Similar to u-syndication links in microformats2 and
912
    # upstream/downstreamDuplicates in AS1.
913
    copies = ndb.StructuredProperty(Target, repeated=True)
1✔
914

915
    created = ndb.DateTimeProperty(auto_now_add=True)
1✔
916
    updated = ndb.DateTimeProperty(auto_now=True)
1✔
917

918
    new = None
1✔
919
    changed = None
1✔
920
    """Protocol and subclasses set these in fetch if this :class:`Object` is
1✔
921
    new or if its contents have changed from what was originally loaded from the
922
    datastore. If either one is None, that means we don't know whether this
923
    :class:`Object` is new/changed.
924

925
    :attr:`changed` is populated by :meth:`activity_changed()`.
926
    """
927

928
    lock = None
1✔
929
    """Initialized in __init__, synchronizes :meth:`add` and :meth:`remove`."""
1✔
930

931
    # these were used for delivery tracking, but they were too expensive,
932
    # so we stopped: https://github.com/snarfed/bridgy-fed/issues/1501
933
    STATUSES = ('new', 'in progress', 'complete', 'failed', 'ignored')
1✔
934
    status = ndb.StringProperty(choices=STATUSES)
1✔
935
    delivered = ndb.StructuredProperty(Target, repeated=True)
1✔
936
    undelivered = ndb.StructuredProperty(Target, repeated=True)
1✔
937
    failed = ndb.StructuredProperty(Target, repeated=True)
1✔
938

939
    @property
1✔
940
    def as1(self):
1✔
941
        def use_urls_as_ids(obj):
1✔
942
            """If id field is missing or not a URL, use the url field."""
943
            id = obj.get('id')
1✔
944
            if not id or not (util.is_web(id) or re.match(DOMAIN_RE, id)):
1✔
945
                if url := util.get_url(obj):
1✔
946
                    obj['id'] = url
1✔
947

948
            for field in 'author', 'actor', 'object':
1✔
949
                if inner := as1.get_object(obj, field):
1✔
950
                    use_urls_as_ids(inner)
1✔
951

952
        if self.our_as1:
1✔
953
            obj = self.our_as1
1✔
954
            if self.atom or self.rss:
1✔
955
                use_urls_as_ids(obj)
1✔
956

957
        elif self.as2:
1✔
958
            obj = as2.to_as1(unwrap(self.as2))
1✔
959

960
        elif self.bsky:
1✔
961
            owner, _, _ = parse_at_uri(self.key.id())
1✔
962
            ATProto = PROTOCOLS['atproto']
1✔
963
            handle = ATProto(id=owner).handle
1✔
964
            try:
1✔
965
                obj = bluesky.to_as1(self.bsky, repo_did=owner, repo_handle=handle,
1✔
966
                                     uri=self.key.id(), pds=ATProto.pds_for(self))
967
            except (ValueError, RequestException):
1✔
968
                logger.info(f"Couldn't convert to ATProto", exc_info=True)
1✔
969
                return None
1✔
970

971
        elif self.mf2:
1✔
972
            obj = microformats2.json_to_object(self.mf2,
1✔
973
                                               rel_urls=self.mf2.get('rel-urls'))
974
            use_urls_as_ids(obj)
1✔
975

976
            # use fetched final URL as id, not u-url
977
            # https://github.com/snarfed/bridgy-fed/issues/829
978
            if url := self.mf2.get('url'):
1✔
979
                obj['id'] = (self.key.id() if self.key and '#' in self.key.id()
1✔
980
                             else url)
981

982
        else:
983
            return None
1✔
984

985
        # populate id if necessary
986
        if self.key:
1✔
987
            obj.setdefault('id', self.key.id())
1✔
988

989
        return obj
1✔
990

991
    @ndb.ComputedProperty
1✔
992
    def type(self):  # AS1 objectType, or verb if it's an activity
1✔
993
        if self.as1:
1✔
994
            return as1.object_type(self.as1)
1✔
995

996
    def __init__(self, *args, **kwargs):
1✔
997
        super().__init__(*args, **kwargs)
1✔
998
        self.lock = Lock()
1✔
999

1000
    def _expire(self):
1✔
1001
        """Maybe automatically delete this Object after 90d using a TTL policy.
1002

1003
        https://cloud.google.com/datastore/docs/ttl
1004

1005
        They recommend not indexing TTL properties:
1006
        https://cloud.google.com/datastore/docs/ttl#ttl_properties_and_indexes
1007
        """
1008
        if self.type not in DONT_EXPIRE_OBJECT_TYPES:
1✔
1009
            return (self.updated or util.now()) + OBJECT_EXPIRE_AGE
1✔
1010

1011
    expire = ndb.ComputedProperty(_expire, indexed=False)
1✔
1012

1013
    def _pre_put_hook(self):
1✔
1014
        """
1015
        * Validate that at:// URIs have DID repos
1016
        * Set/remove the activity label
1017
        * Strip @context from as2 (we don't do LD) to save disk space
1018
        """
1019
        id = self.key.id()
1✔
1020

1021
        if self.source_protocol not in (None, 'ui'):
1✔
1022
            proto = PROTOCOLS[self.source_protocol]
1✔
1023
            assert proto.owns_id(id) is not False, \
1✔
1024
                f'Protocol {proto.LABEL} does not own id {id}'
1025

1026
        if id.startswith('at://'):
1✔
1027
            repo, _, _ = parse_at_uri(id)
1✔
1028
            if not repo.startswith('did:'):
1✔
1029
                # TODO: if we hit this, that means the AppView gave us an AT URI
1030
                # with a handle repo/authority instead of DID. that's surprising!
1031
                # ...if so, and if we need to handle it, add a new
1032
                # arroba.did.canonicalize_at_uri() function, then use it here,
1033
                # or before.
1034
                raise ValueError(
1✔
1035
                    f'at:// URI ids must have DID repos; got {id}')
1036

1037
        if self.as1 and self.as1.get('objectType') == 'activity':
1✔
1038
            self.add('labels', 'activity')
1✔
1039
        elif 'activity' in self.labels:
1✔
1040
            self.remove('labels', 'activity')
1✔
1041

1042
        if self.as2:
1✔
1043
           self.as2.pop('@context', None)
1✔
1044
           for field in 'actor', 'attributedTo', 'author', 'object':
1✔
1045
               for val in util.get_list(self.as2, field):
1✔
1046
                   if isinstance(val, dict):
1✔
1047
                       val.pop('@context', None)
1✔
1048

1049
    def _post_put_hook(self, future):
1✔
1050
        # TODO: assert that as1 id is same as key id? in pre put hook?
1051
        logger.debug(f'Wrote {self.key}')
1✔
1052

1053
    @classmethod
1✔
1054
    def get_or_create(cls, id, authed_as=None, **props):
1✔
1055
        """Returns an :class:`Object` with the given property values.
1056

1057
        If a matching :class:`Object` doesn't exist in the datastore, creates it
1058
        first. Only populates non-False/empty property values in props into the
1059
        object. Also populates the :attr:`new` and :attr:`changed` properties.
1060

1061
        Not transactional because transactions don't read or write memcache. :/
1062
        Fortunately we don't really depend on atomicity for anything, last
1063
        writer wins is pretty much always fine.
1064

1065
        Args:
1066
          authed_as (str): if a matching :class:`Object` already exists, its
1067
            `author` or `actor` must contain this actor id. Implements basic
1068
            authorization for updates and deletes.
1069

1070
        Returns:
1071
          Object:
1072
        """
1073
        obj = cls.get_by_id(id)
1✔
1074
        if obj:
1✔
1075
            obj.new = False
1✔
1076
            orig_as1 = obj.as1
1✔
1077
            if orig_as1:
1✔
1078
                # authorization: check that the authed user is allowed to modify
1079
                # this object
1080
                # https://www.w3.org/wiki/ActivityPub/Primer/Authentication_Authorization
1081
                assert authed_as
1✔
1082
                proto = PROTOCOLS.get(obj.source_protocol)
1✔
1083
                assert proto, obj.source_protocol
1✔
1084
                owners = [ids.normalize_user_id(id=owner, proto=proto)
1✔
1085
                          for owner in (as1.get_ids(orig_as1, 'author')
1086
                                        + as1.get_ids(orig_as1, 'actor'))
1087
                                        + [id]]
1088
                if (ids.normalize_user_id(id=authed_as, proto=proto) not in owners
1✔
1089
                        and ids.profile_id(id=authed_as, proto=proto) not in owners):
1090
                    report_error("Auth: Object: authed_as doesn't match owner",
1✔
1091
                                 user=f'{id} authed_as {authed_as} owners {owners}')
1092
                    error(f"authed user {authed_as} isn't object owner {owners}",
1✔
1093
                          status=403)
1094
        else:
1095
            obj = Object(id=id)
1✔
1096
            obj.new = True
1✔
1097

1098
        obj.changed = None
1✔
1099
        for field in 'new', 'changed':
1✔
1100
            val = props.pop(field, None)
1✔
1101
            if val is not None:
1✔
1102
                setattr(obj, field, val)
1✔
1103

1104
        if set(props.keys()) & set(('as2', 'bsky', 'mf2', 'raw')):
1✔
1105
            obj.clear()
1✔
1106
        obj.populate(**{
1✔
1107
            k: v for k, v in props.items()
1108
            if v and not isinstance(getattr(Object, k), ndb.ComputedProperty)
1109
        })
1110
        if not obj.new and obj.changed is None:
1✔
1111
            obj.changed = obj.activity_changed(orig_as1)
1✔
1112

1113
        obj.put()
1✔
1114
        return obj
1✔
1115

1116
    def add(self, prop, val):
1✔
1117
        """Adds a value to a multiply-valued property. Uses ``self.lock``.
1118

1119
        Args:
1120
          prop (str)
1121
          val
1122
        """
1123
        with self.lock:
1✔
1124
            util.add(getattr(self, prop), val)
1✔
1125

1126
        if prop == 'copies':
1✔
1127
            memcache.pickle_memcache.set(memcache.memoize_key(
1✔
1128
                get_original_object_key, val.uri), self.key)
1129

1130
    def remove(self, prop, val):
1✔
1131
        """Removes a value from a multiply-valued property. Uses ``self.lock``.
1132

1133
        Args:
1134
          prop (str)
1135
          val
1136
        """
1137
        with self.lock:
1✔
1138
            getattr(self, prop).remove(val)
1✔
1139

1140
    def clear(self):
1✔
1141
        """Clears the :attr:`Object.our_as1` property."""
1142
        self.our_as1 = None
1✔
1143

1144
    def activity_changed(self, other_as1):
1✔
1145
        """Returns True if this activity is meaningfully changed from ``other_as1``.
1146

1147
        ...otherwise False.
1148

1149
        Used to populate :attr:`changed`.
1150

1151
        Args:
1152
          other_as1 (dict): AS1 object, or none
1153
        """
1154
        # ignore inReplyTo since we translate it between protocols
1155
        return (as1.activity_changed(self.as1, other_as1, inReplyTo=False)
1✔
1156
                if self.as1 and other_as1
1157
                else bool(self.as1) != bool(other_as1))
1158

1159
    def actor_link(self, image=True, sized=False, user=None):
1✔
1160
        """Returns a pretty HTML link with the actor's name and picture.
1161

1162
        TODO: unify with :meth:`User.user_link`?
1163

1164
        Args:
1165
          image (bool): whether to include an ``img`` tag with the actor's picture
1166
          sized (bool): whether to set an explicit (``width=32``) size on the
1167
            profile picture ``img` tag
1168
          user (User): current user
1169

1170
        Returns:
1171
          str:
1172
        """
1173
        attrs = {'class': 'h-card u-author'}
1✔
1174

1175
        if user and (user.key in self.users or user.key.id() in self.domains):
1✔
1176
            # outbound; show a nice link to the user
1177
            return user.user_link(handle=False, pictures=True)
1✔
1178

1179
        proto = PROTOCOLS.get(self.source_protocol)
1✔
1180

1181
        actor = None
1✔
1182
        if self.as1:
1✔
1183
            actor = (as1.get_object(self.as1, 'actor')
1✔
1184
                     or as1.get_object(self.as1, 'author'))
1185
            # hydrate from datastore if available
1186
            # TODO: optimize! this is called serially in loops, eg in home.html
1187
            if set(actor.keys()) == {'id'} and self.source_protocol:
1✔
1188
                actor_obj = proto.load(actor['id'], remote=False)
1✔
1189
                if actor_obj and actor_obj.as1:
1✔
1190
                    actor = actor_obj.as1
1✔
1191

1192
        if not actor:
1✔
1193
            return ''
1✔
1194
        elif set(actor.keys()) == {'id'}:
1✔
1195
            return common.pretty_link(actor['id'], attrs=attrs, user=user)
1✔
1196

1197
        url = as1.get_url(actor)
1✔
1198
        name = actor.get('displayName') or actor.get('username') or ''
1✔
1199
        img_url = util.get_url(actor, 'image')
1✔
1200
        if not image or not img_url:
1✔
1201
            return common.pretty_link(url, text=name, attrs=attrs, user=user)
1✔
1202

1203
        logo = ''
1✔
1204
        if proto:
1✔
UNCOV
1205
            logo = f'<span class="logo" title="{self.__class__.__name__}">{proto.LOGO_HTML}</span>'
×
1206

1207
        return f"""\
1✔
1208
        {logo}
1209
        <a class="h-card u-author" href="{url}" title="{name}">
1210
          <img class="profile" src="{img_url}" {'width="32"' if sized else ''}/>
1211
          <span style="unicode-bidi: isolate">{util.ellipsize(name, chars=40)}</span>
1212
        </a>"""
1213

1214
    def get_copy(self, proto):
1✔
1215
        """Returns the id for the copy of this object in a given protocol.
1216

1217
        ...or None if no such copy exists. If ``proto`` is ``source_protocol``,
1218
        returns this object's key id.
1219

1220
        Args:
1221
          proto: :class:`Protocol` subclass
1222

1223
        Returns:
1224
          str:
1225
        """
1226
        if self.source_protocol in (proto.LABEL, proto.ABBREV):
1✔
1227
            return self.key.id()
1✔
1228

1229
        for copy in self.copies:
1✔
1230
            if copy.protocol in (proto.LABEL, proto.ABBREV):
1✔
1231
                return copy.uri
1✔
1232

1233
    def resolve_ids(self):
1✔
1234
        """Resolves "copy" ids, subdomain ids, etc with their originals.
1235

1236
        The end result is that all ids are original "source" ids, ie in the
1237
        protocol that they first came from.
1238

1239
        Specifically, resolves:
1240

1241
        * ids in :class:`User.copies` and :class:`Object.copies`, eg ATProto
1242
          records and Nostr events that we bridged, to the ids of their
1243
          original objects in their source protocol, eg
1244
          ``at://did:plc:abc/app.bsky.feed.post/123`` => ``https://mas.to/@user/456``.
1245
        * Bridgy Fed subdomain URLs to the ids embedded inside them, eg
1246
          ``https://bsky.brid.gy/ap/did:plc:xyz`` => ``did:plc:xyz``
1247
        * ATProto bsky.app URLs to their DIDs or `at://` URIs, eg
1248
          ``https://bsky.app/profile/a.com`` => ``did:plc:123``
1249

1250
        ...in these AS1 fields, in place:
1251

1252
        * ``id``
1253
        * ``actor``
1254
        * ``author``
1255
        * ``object``
1256
        * ``object.actor``
1257
        * ``object.author``
1258
        * ``object.id``
1259
        * ``object.inReplyTo``
1260
        * ``tags.[objectType=mention].url``
1261

1262
        :meth:`protocol.Protocol.translate_ids` is partly the inverse of this.
1263
        Much of the same logic is duplicated there!
1264

1265
        TODO: unify with :meth:`normalize_ids`, :meth:`Object.normalize_ids`.
1266
        """
1267
        if not self.as1:
1✔
1268
            return
1✔
1269

1270
        # extract ids, strip Bridgy Fed subdomain URLs
1271
        outer_obj = unwrap(self.as1)
1✔
1272
        if outer_obj != self.as1:
1✔
1273
            self.our_as1 = util.trim_nulls(outer_obj)
1✔
1274

1275
        self_proto = PROTOCOLS.get(self.source_protocol)
1✔
1276
        if not self_proto:
1✔
1277
            return
1✔
1278

1279
        inner_obj = outer_obj['object'] = as1.get_object(outer_obj)
1✔
1280
        replaced = False
1✔
1281

1282
        def replace(val, orig_fn):
1✔
1283
            id = val.get('id') if isinstance(val, dict) else val
1✔
1284
            if not id:
1✔
1285
                return id
1✔
1286

1287
            orig = orig_fn(id)
1✔
1288
            if not orig:
1✔
1289
                return val
1✔
1290

1291
            nonlocal replaced
1292
            replaced = True
1✔
1293
            logger.debug(f'Resolved copy id {val} to original {orig.id()}')
1✔
1294

1295
            if isinstance(val, dict) and util.trim_nulls(val).keys() > {'id'}:
1✔
1296
                val['id'] = orig.id()
1✔
1297
                return val
1✔
1298
            else:
1299
                return orig.id()
1✔
1300

1301
        # actually replace ids
1302
        #
1303
        # object field could be either object (eg repost) or actor (eg follow)
1304
        outer_obj['object'] = replace(inner_obj, get_original_object_key)
1✔
1305
        if not replaced:
1✔
1306
            outer_obj['object'] = replace(inner_obj, get_original_user_key)
1✔
1307

1308
        for obj in outer_obj, inner_obj:
1✔
1309
            for tag in as1.get_objects(obj, 'tags'):
1✔
1310
                if tag.get('objectType') == 'mention':
1✔
1311
                    tag['url'] = replace(tag.get('url'), get_original_user_key)
1✔
1312
            for field, fn in (
1✔
1313
                    ('actor', get_original_user_key),
1314
                    ('author', get_original_user_key),
1315
                    ('inReplyTo', get_original_object_key),
1316
                ):
1317
                obj[field] = [replace(val, fn) for val in util.get_list(obj, field)]
1✔
1318
                if len(obj[field]) == 1:
1✔
1319
                    obj[field] = obj[field][0]
1✔
1320

1321
        if replaced:
1✔
1322
            self.our_as1 = util.trim_nulls(outer_obj)
1✔
1323

1324
    def normalize_ids(self):
1✔
1325
        """Normalizes ids to their protocol's canonical representation, if any.
1326

1327
        For example, normalizes ATProto ``https://bsky.app/...`` URLs to DIDs
1328
        for profiles, ``at://`` URIs for posts.
1329

1330
        Modifies this object in place.
1331

1332
        TODO: unify with :meth:`resolve_ids`, :meth:`Protocol.translate_ids`.
1333
        """
1334
        from protocol import Protocol
1✔
1335

1336
        if not self.as1:
1✔
1337
            return
1✔
1338

1339
        logger.debug(f'Normalizing ids')
1✔
1340
        outer_obj = copy.deepcopy(self.as1)
1✔
1341
        inner_objs = as1.get_objects(outer_obj)
1✔
1342
        replaced = False
1✔
1343

1344
        def replace(val, translate_fn):
1✔
1345
            nonlocal replaced
1346

1347
            orig = val.get('id') if isinstance(val, dict) else val
1✔
1348
            if not orig:
1✔
1349
                return val
1✔
1350

1351
            proto = Protocol.for_id(orig, remote=False)
1✔
1352
            if not proto:
1✔
1353
                return val
1✔
1354

1355
            translated = translate_fn(id=orig, from_=proto, to=proto)
1✔
1356
            if translated and translated != orig:
1✔
1357
                # logger.debug(f'Normalized {proto.LABEL} id {orig} to {translated}')
1358
                replaced = True
1✔
1359
                if isinstance(val, dict):
1✔
1360
                    val['id'] = translated
1✔
1361
                    return val
1✔
1362
                else:
1363
                    return translated
1✔
1364

1365
            return val
1✔
1366

1367
        # actually replace ids
1368
        for obj in [outer_obj] + inner_objs:
1✔
1369
            for tag in as1.get_objects(obj, 'tags'):
1✔
1370
                if tag.get('objectType') == 'mention':
1✔
1371
                    tag['url'] = replace(tag.get('url'), ids.translate_user_id)
1✔
1372
            for field in ['actor', 'author', 'inReplyTo']:
1✔
1373
                fn = (ids.translate_object_id if field == 'inReplyTo'
1✔
1374
                      else ids.translate_user_id)
1375
                obj[field] = [replace(val, fn) for val in util.get_list(obj, field)]
1✔
1376
                if len(obj[field]) == 1:
1✔
1377
                    obj[field] = obj[field][0]
1✔
1378

1379
        outer_obj['object'] = []
1✔
1380
        for inner_obj in inner_objs:
1✔
1381
            translate_fn = (ids.translate_user_id
1✔
1382
                            if (as1.object_type(inner_obj) in as1.ACTOR_TYPES
1383
                                or as1.object_type(outer_obj) in
1384
                                ('follow', 'stop-following'))
1385
                            else ids.translate_object_id)
1386

1387
            got = replace(inner_obj, translate_fn)
1✔
1388
            if isinstance(got, dict) and util.trim_nulls(got).keys() == {'id'}:
1✔
1389
                got = got['id']
1✔
1390

1391
            outer_obj['object'].append(got)
1✔
1392

1393
        if len(outer_obj['object']) == 1:
1✔
1394
            outer_obj['object'] = outer_obj['object'][0]
1✔
1395

1396
        if replaced:
1✔
1397
            self.our_as1 = util.trim_nulls(outer_obj)
1✔
1398

1399

1400
class Follower(ndb.Model):
1✔
1401
    """A follower of a Bridgy Fed user."""
1402
    STATUSES = ('active', 'inactive')
1✔
1403

1404
    # these are both subclasses of User
1405
    from_ = ndb.KeyProperty(name='from', required=True)
1✔
1406
    to = ndb.KeyProperty(required=True)
1✔
1407

1408
    follow = ndb.KeyProperty(Object)  # last follow activity
1✔
1409
    status = ndb.StringProperty(choices=STATUSES, default='active')
1✔
1410

1411
    created = ndb.DateTimeProperty(auto_now_add=True)
1✔
1412
    updated = ndb.DateTimeProperty(auto_now=True)
1✔
1413

1414
    # OLD. some stored entities still have these; do not reuse.
1415
    # src = ndb.StringProperty()
1416
    # dest = ndb.StringProperty()
1417
    # last_follow = JsonProperty()
1418

1419
    def _pre_put_hook(self):
1✔
1420
        # we're a bridge! stick with bridging.
1421
        assert self.from_.kind() != self.to.kind(), f'from {self.from_} to {self.to}'
1✔
1422

1423
    def _post_put_hook(self, future):
1✔
1424
        logger.debug(f'Wrote {self.key}')
1✔
1425

1426
    @classmethod
1✔
1427
    def get_or_create(cls, *, from_, to, **kwargs):
1✔
1428
        """Returns a Follower with the given ``from_`` and ``to`` users.
1429

1430
        Not transactional because transactions don't read or write memcache. :/
1431
        Fortunately we don't really depend on atomicity for anything, last
1432
        writer wins is pretty much always fine.
1433

1434
        If a matching :class:`Follower` doesn't exist in the datastore, creates
1435
        it first.
1436

1437
        Args:
1438
          from_ (User)
1439
          to (User)
1440

1441
        Returns:
1442
          Follower:
1443
        """
1444
        assert from_
1✔
1445
        assert to
1✔
1446

1447
        follower = Follower.query(Follower.from_ == from_.key,
1✔
1448
                                  Follower.to == to.key,
1449
                                  ).get()
1450
        if not follower:
1✔
1451
            follower = Follower(from_=from_.key, to=to.key, **kwargs)
1✔
1452
            follower.put()
1✔
1453
        elif kwargs:
1✔
1454
            # update existing entity with new property values, eg to make an
1455
            # inactive Follower active again
1456
            for prop, val in kwargs.items():
1✔
1457
                setattr(follower, prop, val)
1✔
1458
            follower.put()
1✔
1459

1460
        return follower
1✔
1461

1462
    @staticmethod
1✔
1463
    def fetch_page(collection, user):
1✔
1464
        r"""Fetches a page of :class:`Follower`\s for a given user.
1465

1466
        Wraps :func:`fetch_page`. Paging uses the ``before`` and ``after`` query
1467
        parameters, if available in the request.
1468

1469
        Args:
1470
          collection (str): ``followers`` or ``following``
1471
          user (User)
1472

1473
        Returns:
1474
          (list of Follower, str, str) tuple: results, annotated with an extra
1475
          ``user`` attribute that holds the follower or following :class:`User`,
1476
          and new str query param values for ``before`` and ``after`` to fetch
1477
          the previous and next pages, respectively
1478
        """
1479
        assert collection in ('followers', 'following'), collection
1✔
1480

1481
        filter_prop = Follower.to if collection == 'followers' else Follower.from_
1✔
1482
        query = Follower.query(
1✔
1483
            Follower.status == 'active',
1484
            filter_prop == user.key,
1485
        )
1486

1487
        followers, before, after = fetch_page(query, Follower, by=Follower.updated)
1✔
1488
        users = ndb.get_multi(f.from_ if collection == 'followers' else f.to
1✔
1489
                              for f in followers)
1490
        User.load_multi(u for u in users if u)
1✔
1491

1492
        for f, u in zip(followers, users):
1✔
1493
            f.user = u
1✔
1494
        followers = [f for f in followers if not f.user.status]
1✔
1495

1496
        return followers, before, after
1✔
1497

1498

1499
def fetch_objects(query, by=None, user=None):
1✔
1500
    """Fetches a page of :class:`Object` entities from a datastore query.
1501

1502
    Wraps :func:`fetch_page` and adds attributes to the returned
1503
    :class:`Object` entities for rendering in ``objects.html``.
1504

1505
    Args:
1506
      query (ndb.Query)
1507
      by (ndb.model.Property): either :attr:`Object.updated` or
1508
        :attr:`Object.created`
1509
      user (User): current user
1510

1511
    Returns:
1512
      (list of Object, str, str) tuple:
1513
      (results, new ``before`` query param, new ``after`` query param)
1514
      to fetch the previous and next pages, respectively
1515
    """
1516
    assert by is Object.updated or by is Object.created
1✔
1517
    objects, new_before, new_after = fetch_page(query, Object, by=by)
1✔
1518
    objects = [o for o in objects if as1.is_public(o.as1) and not o.deleted]
1✔
1519

1520
    # synthesize human-friendly content for objects
1521
    for i, obj in enumerate(objects):
1✔
1522
        obj_as1 = obj.as1
1✔
1523
        inner_obj = as1.get_object(obj_as1)
1✔
1524

1525
        # synthesize text snippet
1526
        type = as1.object_type(obj_as1)
1✔
1527
        if type == 'post':
1✔
UNCOV
1528
            inner_type = inner_obj.get('objectType')
×
UNCOV
1529
            if inner_type:
×
UNCOV
1530
                type = inner_type
×
1531

1532
        # AS1 verb => human-readable phrase
1533
        phrases = {
1✔
1534
            'accept': 'accepted',
1535
            'article': 'posted',
1536
            'comment': 'replied',
1537
            'delete': 'deleted',
1538
            'follow': 'followed',
1539
            'invite': 'is invited to',
1540
            'issue': 'filed issue',
1541
            'like': 'liked',
1542
            'note': 'posted',
1543
            'post': 'posted',
1544
            'repost': 'reposted',
1545
            'rsvp-interested': 'is interested in',
1546
            'rsvp-maybe': 'might attend',
1547
            'rsvp-no': 'is not attending',
1548
            'rsvp-yes': 'is attending',
1549
            'share': 'reposted',
1550
            'stop-following': 'unfollowed',
1551
            'undo': 'undid',
1552
            'update': 'updated',
1553
        }
1554
        obj.phrase = phrases.get(type)
1✔
1555

1556
        content = (inner_obj.get('content')
1✔
1557
                   or inner_obj.get('displayName')
1558
                   or inner_obj.get('summary'))
1559
        if content:
1✔
UNCOV
1560
            content = util.parse_html(content).get_text()
×
1561

1562
        urls = as1.object_urls(inner_obj)
1✔
1563
        id = unwrap(inner_obj.get('id', ''))
1✔
1564
        url = urls[0] if urls else id
1✔
1565
        if (type == 'update' and
1✔
1566
            (obj.users and (user.is_web_url(id)
1567
                            or id.strip('/') == obj.users[0].id())
1568
             or obj.domains and id.strip('/') == f'https://{obj.domains[0]}')):
UNCOV
1569
            obj.phrase = 'updated'
×
UNCOV
1570
            obj_as1.update({
×
1571
                'content': 'their profile',
1572
                'url': id,
1573
            })
1574
        elif url and not content:
1✔
1575
            # heuristics for sniffing URLs and converting them to more friendly
1576
            # phrases and user handles.
1577
            # TODO: standardize this into granary.as2 somewhere?
1578
            from activitypub import FEDI_URL_RE
1✔
1579
            from atproto import COLLECTION_TO_TYPE, did_to_handle
1✔
1580

1581
            handle = suffix = ''
1✔
1582
            if match := FEDI_URL_RE.match(url):
1✔
UNCOV
1583
                handle = match.group(2)
×
UNCOV
1584
                if match.group(4):
×
UNCOV
1585
                    suffix = "'s post"
×
1586
            elif match := BSKY_APP_URL_RE.match(url):
1✔
UNCOV
1587
                handle = match.group('id')
×
UNCOV
1588
                if match.group('tid'):
×
1589
                    suffix = "'s post"
×
1590
            elif match := AT_URI_PATTERN.match(url):
1✔
1591
                handle = match.group('repo')
×
UNCOV
1592
                if coll := match.group('collection'):
×
1593
                    suffix = f"'s {COLLECTION_TO_TYPE.get(coll) or 'post'}"
×
1594
                url = bluesky.at_uri_to_web_url(url)
×
1595
            elif url.startswith('did:'):
1✔
UNCOV
1596
                handle = url
×
1597
                url = bluesky.Bluesky.user_url(handle)
×
1598

1599
            if handle:
1✔
1600
                if handle.startswith('did:'):
×
UNCOV
1601
                    handle = did_to_handle(handle) or handle
×
1602
                content = f'@{handle}{suffix}'
×
1603

1604
            if url:
1✔
1605
                content = common.pretty_link(url, text=content, user=user)
1✔
1606

1607
        obj.content = (obj_as1.get('content')
1✔
1608
                       or obj_as1.get('displayName')
1609
                       or obj_as1.get('summary'))
1610
        obj.url = util.get_first(obj_as1, 'url')
1✔
1611

1612
        if type in ('like', 'follow', 'repost', 'share') or not obj.content:
1✔
1613
            if obj.url:
1✔
UNCOV
1614
                obj.phrase = common.pretty_link(
×
1615
                    obj.url, text=obj.phrase, attrs={'class': 'u-url'}, user=user)
1616
            if content:
1✔
1617
                obj.content = content
1✔
1618
                obj.url = url
1✔
1619

1620
    return objects, new_before, new_after
1✔
1621

1622

1623
def fetch_page(query, model_class, by=None):
1✔
1624
    """Fetches a page of results from a datastore query.
1625

1626
    Uses the ``before`` and ``after`` query params (if provided; should be
1627
    ISO8601 timestamps) and the ``by`` property to identify the page to fetch.
1628

1629
    Populates a ``log_url_path`` property on each result entity that points to a
1630
    its most recent logged request.
1631

1632
    Args:
1633
      query (google.cloud.ndb.query.Query)
1634
      model_class (class)
1635
      by (ndb.model.Property): paging property, eg :attr:`Object.updated`
1636
        or :attr:`Object.created`
1637

1638
    Returns:
1639
      (list of Object or Follower, str, str) tuple: (results, new_before,
1640
      new_after), where new_before and new_after are query param values for
1641
      ``before`` and ``after`` to fetch the previous and next pages,
1642
      respectively
1643
    """
1644
    assert by
1✔
1645

1646
    # if there's a paging param ('before' or 'after'), update query with it
1647
    # TODO: unify this with Bridgy's user page
1648
    def get_paging_param(param):
1✔
1649
        val = request.values.get(param)
1✔
1650
        if val:
1✔
1651
            try:
1✔
1652
                dt = util.parse_iso8601(val.replace(' ', '+'))
1✔
1653
            except BaseException as e:
1✔
1654
                error(f"Couldn't parse {param}, {val!r} as ISO8601: {e}")
1✔
1655
            if dt.tzinfo:
1✔
1656
                dt = dt.astimezone(timezone.utc).replace(tzinfo=None)
1✔
1657
            return dt
1✔
1658

1659
    before = get_paging_param('before')
1✔
1660
    after = get_paging_param('after')
1✔
1661
    if before and after:
1✔
UNCOV
1662
        error("can't handle both before and after")
×
1663
    elif after:
1✔
1664
        query = query.filter(by >= after).order(by)
1✔
1665
    elif before:
1✔
1666
        query = query.filter(by < before).order(-by)
1✔
1667
    else:
1668
        query = query.order(-by)
1✔
1669

1670
    query_iter = query.iter()
1✔
1671
    results = sorted(itertools.islice(query_iter, 0, PAGE_SIZE),
1✔
1672
                     key=lambda r: r.updated, reverse=True)
1673

1674
    # calculate new paging param(s)
1675
    has_next = results and query_iter.probably_has_next()
1✔
1676
    new_after = (
1✔
1677
        before if before
1678
        else results[0].updated if has_next and after
1679
        else None)
1680
    if new_after:
1✔
1681
        new_after = new_after.isoformat()
1✔
1682

1683
    new_before = (
1✔
1684
        after if after else
1685
        results[-1].updated if has_next
1686
        else None)
1687
    if new_before:
1✔
1688
        new_before = new_before.isoformat()
1✔
1689

1690
    return results, new_before, new_after
1✔
1691

1692

1693
@lru_cache(maxsize=100000)
1✔
1694
@memcache.memoize(expire=GET_ORIGINALS_CACHE_EXPIRATION)
1✔
1695
def get_original_object_key(copy_id):
1✔
1696
    """Finds the :class:`Object` with a given copy id, if any.
1697

1698
    Note that :meth:`Object.add` also updates this function's
1699
    :func:`memcache.memoize` cache.
1700

1701
    Args:
1702
      copy_id (str)
1703

1704
    Returns:
1705
      google.cloud.ndb.Key or None
1706
    """
1707
    assert copy_id
1✔
1708

1709
    return Object.query(Object.copies.uri == copy_id).get(keys_only=True)
1✔
1710

1711

1712
@lru_cache(maxsize=100000)
1✔
1713
@memcache.memoize(expire=GET_ORIGINALS_CACHE_EXPIRATION)
1✔
1714
def get_original_user_key(copy_id):
1✔
1715
    """Finds the user with a given copy id, if any.
1716

1717
    Note that :meth:`User.add` also updates this function's
1718
    :func:`memcache.memoize` cache.
1719

1720
    Args:
1721
      copy_id (str)
1722
      not_proto (Protocol): optional, don't query this protocol
1723

1724
    Returns:
1725
      google.cloud.ndb.Key or None
1726
    """
1727
    assert copy_id
1✔
1728

1729
    for proto in PROTOCOLS.values():
1✔
1730
        if proto and proto.LABEL != 'ui' and not proto.owns_id(copy_id):
1✔
1731
            if orig := proto.query(proto.copies.uri == copy_id).get(keys_only=True):
1✔
1732
                return orig
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc