• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

snarfed / bridgy / 0b57fc0a-1e1d-43d1-8635-424b087c5161

02 Mar 2026 03:28PM UTC coverage: 92.338% (+0.4%) from 91.96%
0b57fc0a-1e1d-43d1-8635-424b087c5161

push

circleci

snarfed
deploy.sh: don't run datastore emulator, it's always running locally now

3808 of 4124 relevant lines covered (92.34%)

0.92 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

96.18
/models.py
1
"""Datastore model classes."""
2
from datetime import datetime, timedelta, timezone
1✔
3
import logging
1✔
4
import os
1✔
5
import re
1✔
6

7
from google.cloud import ndb
1✔
8
from granary import as1
1✔
9
from granary import microformats2
1✔
10
from granary import source as gr_source
1✔
11
from oauth_dropins import bluesky as oauth_bluesky
1✔
12
from oauth_dropins.indieauth import IndieAuth
1✔
13
from oauth_dropins.instagram import INSTAGRAM_SESSIONID_COOKIE
1✔
14
from requests_oauth2client import OAuth2AccessTokenAuth, TokenSerializer
1✔
15
from oauth_dropins.webutil import webmention
1✔
16
from oauth_dropins.webutil.flask_util import flash
1✔
17
from oauth_dropins.webutil.models import StringIdModel
1✔
18
from oauth_dropins.webutil.util import json_dumps, json_loads
1✔
19
import requests
1✔
20

21
import superfeedr
1✔
22
import util
1✔
23

24
logger = logging.getLogger(__name__)
1✔
25

26
VERB_TYPES = ('post', 'comment', 'like', 'react', 'repost', 'rsvp', 'tag')
1✔
27
PUBLISH_TYPES = VERB_TYPES + ('preview', 'delete')
1✔
28

29
MAX_AUTHOR_URLS = 5
1✔
30

31
REFETCH_HFEED_TRIGGER = datetime.fromtimestamp(-1, tz=timezone.utc)
1✔
32

33
# limit size of block lists stored in source entities to try to keep whole
34
# entity under 1MB datastore limit:
35
# https://cloud.google.com/datastore/docs/concepts/limits
36
BLOCKLIST_MAX_IDS = 20000
1✔
37

38
# maps string short name to Source subclass. populated by SourceMeta.
39
sources = {}
1✔
40

41

42
def get_type(obj):
1✔
43
  """Returns the :class:`Response` or :class:`Publish` type for an AS object."""
44
  type = obj.get('objectType')
1✔
45
  verb = obj.get('verb')
1✔
46
  if type == 'activity' and verb == 'share':
1✔
47
    return 'repost'
1✔
48
  elif type == 'issue':
1✔
49
    return 'post'
1✔
50
  elif verb in as1.RSVP_VERB_TO_COLLECTION:
1✔
51
    return 'rsvp'
1✔
52
  elif (type == 'comment' or obj.get('inReplyTo') or
1✔
53
        any(o.get('inReplyTo') for o in
54
            (util.get_list(obj, 'object')) + util.get_list(obj, 'context'))):
55
    return 'comment'
1✔
56
  elif verb in VERB_TYPES:
1✔
57
    return verb
1✔
58
  else:
59
    return 'post'
1✔
60

61

62
class DisableSource(Exception):
1✔
63
  """Raised when a user has deauthorized our app inside a given platform."""
64

65

66
class SourceMeta(ndb.MetaModel):
1✔
67
  """:class:`Source` metaclass. Registers all subclasses in the ``sources`` global."""
68
  def __new__(meta, name, bases, class_dict):
1✔
69
    cls = ndb.MetaModel.__new__(meta, name, bases, class_dict)
1✔
70
    if cls.SHORT_NAME:
1✔
71
      sources[cls.SHORT_NAME] = cls
1✔
72
    return cls
1✔
73

74

75
class Source(StringIdModel, metaclass=SourceMeta):
1✔
76
  """A silo account, e.g. a Facebook or Google+ account.
77

78
  Each concrete silo class should subclass this class.
79
  """
80
  STATUSES = ('enabled', 'disabled')
1✔
81
  POLL_STATUSES = ('ok', 'error', 'polling')
1✔
82

83
  # short name for this site type. used in URLs, etc.
84
  SHORT_NAME = None
1✔
85
  # the corresponding granary class
86
  GR_CLASS = None
1✔
87
  # oauth-dropins Start class
88
  OAUTH_START = None
1✔
89
  # oauth-dropins datastore model class
90
  AUTH_MODEL = None
1✔
91
  # whether Bridgy supports listen for this silo
92
  CAN_LISTEN = True
1✔
93
  # whether Bridgy supports publish for this silo
94
  CAN_PUBLISH = None
1✔
95
  # string name of oauth-dropins auth entity property to use as Micropub token
96
  MICROPUB_TOKEN_PROPERTY = None
1✔
97
  # whether this source should poll automatically, or only when triggered
98
  # (eg Instagram)
99
  AUTO_POLL = True
1✔
100
  # how often to poll for responses
101
  VOLUME_POLL = timedelta(minutes=5)
1✔
102
  FAST_POLL = timedelta(minutes=30)
1✔
103
  # how often to poll sources that have never sent a webmention
104
  SLOW_POLL = timedelta(days=1)
1✔
105
  # how often to poll sources that are currently rate limited by their silo
106
  RATE_LIMITED_POLL = SLOW_POLL
1✔
107
  # how long to wait after signup for a successful webmention before dropping to
108
  # the lower frequency poll
109
  FAST_POLL_GRACE_PERIOD = timedelta(days=7)
1✔
110
  # how often refetch author url to look for updated syndication links
111
  FAST_REFETCH = timedelta(hours=6)
1✔
112
  # refetch less often (this often) if it's been >2w since the last synd link
113
  SLOW_REFETCH = timedelta(days=2)
1✔
114
  # rate limiting HTTP status codes returned by this silo. e.g. twitter returns
115
  # 429, instagram 503, google+ 403.
116
  RATE_LIMIT_HTTP_CODES = ('429',)
1✔
117
  DISABLE_HTTP_CODES = ('401',)
1✔
118
  TRANSIENT_ERROR_HTTP_CODES = ()
1✔
119
  # whether granary supports fetching block lists
120
  HAS_BLOCKS = False
1✔
121
  # whether to require a u-syndication link for backfeed
122
  BACKFEED_REQUIRES_SYNDICATION_LINK = False
1✔
123
  # ignore fragments when comparing syndication links in OPD
124
  IGNORE_SYNDICATION_LINK_FRAGMENTS = False
1✔
125
  # convert username to all lower case to use as key name
126
  USERNAME_KEY_ID = False
1✔
127

128
  # Maps Publish.type (e.g. 'like') to source-specific human readable type label
129
  # (e.g. 'favorite'). Subclasses should override this.
130
  TYPE_LABELS = {}
1✔
131

132
  # subclasses should override this
133
  URL_CANONICALIZER = util.UrlCanonicalizer()
1✔
134

135
  # Regexps for URL paths that don't accept incoming webmentions. Currently used
136
  # by Blogger.
137
  PATH_BLOCKLIST = ()
1✔
138

139
  created = ndb.DateTimeProperty(auto_now_add=True, required=True, tzinfo=timezone.utc)
1✔
140
  url = ndb.StringProperty()
1✔
141
  username = ndb.StringProperty()
1✔
142
  status = ndb.StringProperty(choices=STATUSES, default='enabled')
1✔
143
  poll_status = ndb.StringProperty(choices=POLL_STATUSES, default='ok')
1✔
144
  rate_limited = ndb.BooleanProperty(default=False)
1✔
145
  name = ndb.StringProperty()  # full human-readable name
1✔
146
  picture = ndb.StringProperty()
1✔
147
  domains = ndb.StringProperty(repeated=True)
1✔
148
  domain_urls = ndb.StringProperty(repeated=True)
1✔
149
  features = ndb.StringProperty(repeated=True, choices=util.FEATURES)
1✔
150
  superfeedr_secret = ndb.StringProperty()
1✔
151
  webmention_endpoint = ndb.StringProperty()
1✔
152

153
  # points to an oauth-dropins auth entity. The model class should be a subclass
154
  # of oauth_dropins.BaseAuth. the token should be generated with the
155
  # offline_access scope so that it doesn't expire.
156
  auth_entity = ndb.KeyProperty()
1✔
157

158
  #
159
  # listen-only properties
160
  #
161
  last_polled = ndb.DateTimeProperty(default=util.EPOCH, tzinfo=timezone.utc)
1✔
162
  last_poll_attempt = ndb.DateTimeProperty(default=util.EPOCH, tzinfo=timezone.utc)
1✔
163
  last_webmention_sent = ndb.DateTimeProperty(tzinfo=timezone.utc)
1✔
164
  last_public_post = ndb.DateTimeProperty(tzinfo=timezone.utc)
1✔
165
  recent_private_posts = ndb.IntegerProperty(default=0)
1✔
166

167
  # the last time we re-fetched the author's url looking for updated
168
  # syndication links
169
  last_hfeed_refetch = ndb.DateTimeProperty(default=util.EPOCH, tzinfo=timezone.utc)
1✔
170

171
  # the last time we've seen a rel=syndication link for this Source.
172
  # we won't spend the time to re-fetch and look for updates if there's
173
  # never been one
174
  last_syndication_url = ndb.DateTimeProperty(tzinfo=timezone.utc)
1✔
175
  # the last time we saw a syndication link in an h-feed, as opposed to just on
176
  # permalinks. background: https://github.com/snarfed/bridgy/issues/624
177
  last_feed_syndication_url = ndb.DateTimeProperty(tzinfo=timezone.utc)
1✔
178

179
  last_activity_id = ndb.StringProperty()
1✔
180
  last_activities_etag = ndb.StringProperty()
1✔
181
  last_activities_cache_json = ndb.TextProperty()
1✔
182
  seen_responses_cache_json = ndb.TextProperty(compressed=True)
1✔
183

184
  # populated in Poll.poll(), used by handlers
185
  blocked_ids = ndb.JsonProperty(compressed=True)
1✔
186

187
  # maps updated property names to values that put_updates() writes back to the
188
  # datastore transactionally. set this to {} before beginning.
189
  updates = None
1✔
190

191
  # gr_source is *not* set to None by default here, since it needs to be unset
192
  # for __getattr__ to run when it's accessed.
193

194
  def __init__(self, *args, id=None, **kwargs):
1✔
195
    """Constructor. Escapes the key string id if it starts with ``__``."""
196
    username = kwargs.get('username')
1✔
197
    if self.USERNAME_KEY_ID and username and not id:
1✔
198
      id = username.lower()
1✔
199
    if id and id.startswith('__'):
1✔
200
      id = '\\' + id
1✔
201
    super().__init__(*args, id=id, **kwargs)
1✔
202

203
  def key_id(self):
1✔
204
    """Returns the key's unescaped string id."""
205
    id = self.key.id()
1✔
206
    return id[1:] if id[0] == '\\' else id
1✔
207

208
  @classmethod
1✔
209
  def new(cls, **kwargs):
1✔
210
    """Factory method. Creates and returns a new instance for the current user.
211

212
    To be implemented by subclasses.
213
    """
214
    raise NotImplementedError()
×
215

216
  def __getattr__(self, name):
1✔
217
    """Lazily load the auth entity and instantiate :attr:`self.gr_source`.
218

219
    Once :attr:`self.gr_source` is set, this method will *not* be called;
220
    :attr:`gr_source` will be returned normally.
221
    """
222
    if name != 'gr_source':
1✔
223
      return getattr(super(), name)
1✔
224

225
    super_attr = getattr(super(), name, None)
1✔
226
    if super_attr:
1✔
227
      return super_attr
×
228
    elif not self.auth_entity:
1✔
229
      return None
×
230

231
    auth_entity = self.auth_entity.get()
1✔
232
    try:
1✔
233
      refresh_token = auth_entity.refresh_token
1✔
234
      self.gr_source = self.GR_CLASS(refresh_token)
1✔
235
      return self.gr_source
1✔
236
    except AttributeError:
1✔
237
      logger.info('no refresh_token')
1✔
238
    args = auth_entity.access_token()
1✔
239
    if not isinstance(args, tuple):
1✔
240
      args = (args,)
1✔
241

242
    kwargs = {}
1✔
243
    if self.key.kind() == 'FacebookPage' and auth_entity.type == 'user':
1✔
244
      kwargs = {'user_id': self.key_id()}
×
245

246
    elif self.key.kind() == 'Instagram':
1✔
247
      kwargs = {'scrape': True, 'cookie': INSTAGRAM_SESSIONID_COOKIE}
×
248

249
    elif self.key.kind() == 'Mastodon':
1✔
250
      args = (auth_entity.instance(),) + args
1✔
251
      inst = auth_entity.app.get().instance_info
1✔
252
      if inst:
1✔
253
        j = json_loads(inst)
1✔
254
        truncate_text_length = j.get("configuration", {}).get('statuses', {}).get('max_characters', None) or j.get('max_toot_chars', None)
1✔
255
      else:
256
        truncate_text_length = None
1✔
257
      kwargs = {
1✔
258
        'user_id': json_loads(auth_entity.user_json).get('id'),
259
        # https://docs-develop.pleroma.social/backend/API/differences_in_mastoapi_responses/#instance
260
        'truncate_text_length': truncate_text_length,
261
      }
262

263
    elif self.key.kind() == 'Twitter':
1✔
264
      kwargs = {'username': self.key_id()}
×
265

266
    elif self.key.kind() == 'Bluesky':
1✔
267
      did = auth_entity.key.id()
1✔
268

269
      def session_callback(auth_or_session):
1✔
270
        logger.info(f'Storing Bluesky creds for {did}: {auth_or_session}')
1✔
271
        if isinstance(auth_or_session, dict):
1✔
272
          auth_entity.session = auth_or_session
1✔
273
        else:
274
          auth_entity.dpop_token = TokenSerializer().dumps(auth_obj.token)
×
275
        auth_entity.put()
1✔
276

277
      args = []
1✔
278
      kwargs = {
1✔
279
        'handle': json_loads(auth_entity.user_json).get('handle'),
280
        'did': did,
281
        'session_callback': session_callback,
282
      }
283

284
      if auth_entity.dpop_token:
1✔
285
        # OAuth
286
        pds_url = auth_entity.pds_url or oauth_bluesky.pds_for_did(did)
1✔
287
        oauth_client = oauth_bluesky.oauth_client_for_pds(
1✔
288
          util.bluesky_oauth_client_metadata(), pds_url)
289
        token = TokenSerializer().loads(auth_entity.dpop_token)
1✔
290
        kwargs.update({
1✔
291
          'auth': OAuth2AccessTokenAuth(client=oauth_client, token=token),
292
          'pds_url': pds_url,
293
        })
294
      else:
295
        # app password based access token
296
        kwargs.update({
1✔
297
          'access_token': auth_entity.session.get('accessJwt'),
298
          'refresh_token': auth_entity.session.get('refreshJwt'),
299
        })
300

301
    self.gr_source = self.GR_CLASS(*args, **kwargs)
1✔
302
    return self.gr_source
1✔
303

304
  @classmethod
1✔
305
  def lookup(cls, id):
1✔
306
    """Returns the entity with the given id.
307

308
    By default, interprets id as just the key id. Subclasses may extend this to
309
    support usernames, etc.
310

311
    Ideally, if ``USERNAME_KEY_ID``, normalize to lower case before looking up.
312
    We'd need to backfill all existing entities with upper case key ids, though,
313
    which we're not planning to do. https://github.com/snarfed/bridgy/issues/884
314
    """
315
    if id and id.startswith('__'):
1✔
316
      id = '\\' + id
×
317
    return ndb.Key(cls, id).get()
1✔
318

319
  def user_tag_id(self):
1✔
320
    """Returns the tag URI for this source, e.g. ``tag:plus.google.com:123456``."""
321
    return self.gr_source.tag_uri(self.key_id())
1✔
322

323
  def bridgy_path(self):
1✔
324
    """Returns the Bridgy page URL path for this source."""
325
    return f'/{self.SHORT_NAME}/{self.key_id()}'
1✔
326

327
  def bridgy_url(self):
1✔
328
    """Returns the Bridgy page URL for this source."""
329
    return util.host_url(self.bridgy_path())
1✔
330

331
  def silo_url(self, handler):
1✔
332
    """Returns the silo account URL, e.g. https://twitter.com/foo."""
333
    raise NotImplementedError()
×
334

335
  def label(self):
1✔
336
    """Human-readable label for this source."""
337
    return f'{self.label_name()} ({self.GR_CLASS.NAME})'
1✔
338

339
  def label_name(self):
1✔
340
    """Human-readable name or username for this source, whichever is preferred."""
341
    return self.name or self.key_id()
1✔
342

343
  def post_id(self, url):
1✔
344
    """
345
    Resolve the ID of a post from a URL.
346
    By default calls out to Granary's classmethod but can be
347
    overridden if a URL needs user-specific treatment.
348
    """
349
    return self.gr_source.post_id(url)
1✔
350

351
  @classmethod
1✔
352
  @ndb.transactional()
1✔
353
  def put_updates(cls, source):
1✔
354
    """Writes ``source.updates`` to the datastore transactionally.
355

356
    Returns:
357
      source (Source)
358

359
    Returns:
360
      ``source``, updated
361
    """
362
    if not source.updates:
1✔
363
      return source
×
364

365
    to_log = {k: v for k, v in source.updates.items() if not k.endswith('_json')}
1✔
366
    logger.info(f'Updating {source.label()} {source.bridgy_path()} : {to_log!r}')
1✔
367

368
    updates = source.updates
1✔
369
    source = source.key.get()
1✔
370
    source.updates = updates
1✔
371
    for name, val in updates.items():
1✔
372
      setattr(source, name, val)
1✔
373

374
    source.put()
1✔
375
    return source
1✔
376

377
  def poll_period(self):
1✔
378
    """Returns the poll frequency for this source, as a :class:`datetime.timedelta`.
379

380
    Defaults to ~30m, depending on silo. If we've never sent a webmention for
381
    this source, or the last one we sent was over a month ago, we drop them down
382
    to ~1d after a week long grace period.
383
    """
384
    now = util.now()
1✔
385
    if self.rate_limited:
1✔
386
      return self.RATE_LIMITED_POLL
1✔
387
    elif now < self.created + self.FAST_POLL_GRACE_PERIOD:
1✔
388
      return self.FAST_POLL
1✔
389
    elif not self.last_webmention_sent:
1✔
390
      return self.SLOW_POLL
1✔
391
    elif (self.is_volume_user()
1✔
392
          and self.last_webmention_sent > now - timedelta(hours=1)):
393
      return self.VOLUME_POLL
1✔
394
    elif self.last_webmention_sent > now - timedelta(days=7):
1✔
395
      return self.FAST_POLL
1✔
396
    elif self.last_webmention_sent > now - timedelta(days=30):
1✔
397
      return self.FAST_POLL * 10
1✔
398
    else:
399
      return self.SLOW_POLL
1✔
400

401
  def should_refetch(self):
1✔
402
    """Returns True if we should run OPD refetch on this source now."""
403
    now = util.now()
1✔
404
    if self.last_hfeed_refetch == REFETCH_HFEED_TRIGGER:
1✔
405
      return True
1✔
406
    elif not self.last_syndication_url:
1✔
407
      return False
1✔
408

409
    period = (self.FAST_REFETCH
1✔
410
              if self.last_syndication_url > now - timedelta(days=14)
411
              else self.SLOW_REFETCH)
412
    return self.last_poll_attempt >= self.last_hfeed_refetch + period
1✔
413

414
  @classmethod
1✔
415
  def bridgy_webmention_endpoint(cls, domain='brid.gy'):
1✔
416
    """Returns the Bridgy webmention endpoint for this source type."""
417
    return f'https://{domain}/webmention/{cls.SHORT_NAME}'
1✔
418

419
  def has_bridgy_webmention_endpoint(self):
1✔
420
    """Returns True if this source uses Bridgy's webmention endpoint."""
421
    return self.webmention_endpoint in (
1✔
422
      self.bridgy_webmention_endpoint(),
423
      self.bridgy_webmention_endpoint(domain='www.brid.gy'))
424

425
  def get_author_urls(self):
1✔
426
    """Determine the author urls for a particular source.
427

428
    In debug mode, replace test domains with localhost.
429

430
    Return:
431
      list of str: URLs, possibly empty
432
    """
433
    return [util.replace_test_domains_with_localhost(u) for u in self.domain_urls]
1✔
434

435
  def search_for_links(self):
1✔
436
    """Searches for activities with links to any of this source's web sites.
437

438
    * https://github.com/snarfed/bridgy/issues/456
439
    * https://github.com/snarfed/bridgy/issues/565
440

441
    Returns:
442
      list of dict: ActivityStreams activities
443
    """
444
    return []
1✔
445

446
  def get_activities_response(self, **kwargs):
1✔
447
    """Returns recent posts and embedded comments for this source.
448

449
    May be overridden by subclasses.
450
    """
451
    kwargs.setdefault('group_id', gr_source.SELF)
1✔
452
    resp = self.gr_source.get_activities_response(**kwargs)
1✔
453

454
    # Try to make all ids tag URIs
455
    # https://github.com/snarfed/bridgy/issues/1913
456
    def convert_to_tag_ids(obj):
1✔
457
      if isinstance(obj, dict):
1✔
458
        if (id := obj.get('id')) and (id.startswith('http:')
1✔
459
                                      or id.startswith('https:')):
460
          if (as1.object_type(obj) in as1.ACTOR_TYPES
1✔
461
              and (username := obj.get('username'))):
462
            obj['id'] = self.gr_source.tag_uri(username)
1✔
463
          elif post_id := obj.get('numeric_id') or self.post_id(id):
1✔
464
            obj['id'] = self.gr_source.tag_uri(post_id)
1✔
465

466
        for val in obj.values():
1✔
467
          convert_to_tag_ids(val)
1✔
468

469
      elif isinstance(obj, (tuple, list, set)):
1✔
470
        for val in obj:
1✔
471
          convert_to_tag_ids(val)
1✔
472

473
    for activity in resp['items']:
1✔
474
      convert_to_tag_ids(activity)
1✔
475
      self._inject_user_urls(activity)
1✔
476

477
    return resp
1✔
478

479
  def get_activities(self, **kwargs):
1✔
480
    activities = self.get_activities_response(**kwargs)['items']
1✔
481

482
    return activities
1✔
483

484
  def get_comment(self, comment_id, **kwargs):
1✔
485
    """Returns a comment from this source.
486

487
    Passes through to granary by default. May be overridden by subclasses.
488

489
    Args:
490
      comment_id (str): site-specific comment id
491
      kwargs: passed to :meth:`granary.source.Source.get_comment`
492

493
    Returns:
494
      dict: decoded ActivityStreams comment object, or None
495
    """
496
    comment = self.gr_source.get_comment(comment_id, **kwargs)
1✔
497
    if comment:
1✔
498
      self._inject_user_urls(comment)
1✔
499
    return comment
1✔
500

501
  def get_like(self, activity_user_id, activity_id, like_user_id, **kwargs):
1✔
502
    """Returns an ActivityStreams ``like`` activity object.
503

504
    Passes through to granary by default. May be overridden by subclasses.
505

506
    Args:
507
      activity_user_id (str): id of the user who posted the original activity
508
      activity_id (str): activity id
509
      like_user_id (str): id of the user who liked the activity
510
      kwargs: passed to :meth:`granary.source.Source.get_comment`
511
    """
512
    return self.gr_source.get_like(activity_user_id, activity_id, like_user_id,
1✔
513
                                   **kwargs)
514

515
  def _inject_user_urls(self, activity):
1✔
516
    """Adds this user's web site URLs to their user mentions (in tags), in place."""
517
    obj = activity.get('object') or activity
1✔
518
    user_tag_id = self.user_tag_id()
1✔
519
    for tag in obj.get('tags', []):
1✔
520
      if tag.get('id') == user_tag_id:
1✔
521
        tag.setdefault('urls', []).extend([{'value': u} for u in self.domain_urls])
1✔
522

523
  def create_comment(self, post_url, author_name, author_url, content):
1✔
524
    """Creates a new comment in the source silo.
525

526
    Must be implemented by subclasses.
527

528
    Args:
529
      post_url (str)
530
      author_name (str)
531
      author_url (str)
532
      content (str)
533

534
    Returns:
535
      dict: response with at least ``id`` field
536
    """
537
    raise NotImplementedError()
×
538

539
  def feed_url(self):
1✔
540
    """Returns the RSS or Atom (or similar) feed URL for this source.
541

542
    Must be implemented by subclasses. Currently only implemented by
543
    :mod:`tumblr` and :mod:`wordpress_rest`.
544

545
    Returns:
546
      str: URL
547
    """
548
    raise NotImplementedError()
×
549

550
  def edit_template_url(self):
1✔
551
    """Returns the URL for editing this blog's template HTML.
552

553
    Must be implemented by subclasses. Currently only implemented by
554
    :mod:`tumblr` and :mod:`wordpress_rest`.
555

556
    Returns:
557
      str: URL
558
    """
559
    raise NotImplementedError()
×
560

561
  def format_for_source_url(self, id):
1✔
562
    """Returns the given id formatted for a URL if necessary.
563
    Some silos use keys containing slashes.
564
    By default this is a no-op - can be overridden by subclasses.
565

566
    Args:
567
      id: The id to format
568

569
    Returns:
570
      string formatted id
571
    """
572
    return id
1✔
573

574
  @classmethod
1✔
575
  def button_html(cls, feature, **kwargs):
1✔
576
    """Returns an HTML string with a login form and button for this site.
577

578
    Mostly just passes through to
579
    :meth:`oauth_dropins.handlers.Start.button_html`.
580

581
    Returns:
582
      str: HTML
583
    """
584
    assert set(feature.split(',')) <= set(util.FEATURES)
1✔
585
    form_extra = (kwargs.pop('form_extra', '') +
1✔
586
                  f'<input name="feature" type="hidden" value="{feature}" />')
587

588
    source = kwargs.pop('source', None)
1✔
589
    if source:
1✔
590
      form_extra += f'\n<input name="id" type="hidden" value="{source.key_id()}" />'
1✔
591

592
    if cls.OAUTH_START:
1✔
593
      return cls.OAUTH_START.button_html(
1✔
594
        f'/{cls.SHORT_NAME}/start',
595
        form_extra=form_extra,
596
        image_prefix='/oauth_dropins_static/',
597
        **kwargs)
598

599
    return ''
×
600

601
  @classmethod
1✔
602
  @ndb.transactional()
1✔
603
  def create_new(cls, user_url=None, **kwargs):
1✔
604
    """Creates and saves a new :class:`Source` and adds a poll task for it.
605

606
    Args:
607
      user_url (str): if provided, supersedes other urls when determining the
608
        ``author_url``
609
      kwargs: passed to :meth:`new()`
610

611
    Returns:
612
      Source: newly created entity
613
    """
614
    source = cls.new(**kwargs)
1✔
615
    if source is None:
1✔
616
      return None
×
617

618
    auth_entity = kwargs.get('auth_entity')
1✔
619
    # defer to the source if it already set domain_urls
620
    if not source.domain_urls and auth_entity and hasattr(auth_entity, 'user_json'):
1✔
621
      source.domain_urls, source.domains = source.urls_and_domains(
1✔
622
        auth_entity, user_url)
623
    logger.debug(f'URLs/domains: {source.domain_urls} {source.domains}')
1✔
624

625
    # check if this source already exists
626
    existing = source.key.get()
1✔
627
    if existing:
1✔
628
      # merge some fields
629
      if not auth_entity or not auth_entity.SCOPES_RESET:
1✔
630
        source.features = set(source.features + existing.features)
1✔
631
      source.populate(**existing.to_dict(include=(
1✔
632
            'created', 'last_hfeed_refetch', 'last_poll_attempt', 'last_polled',
633
            'last_syndication_url', 'last_webmention_sent', 'superfeedr_secret',
634
            'webmention_endpoint')))
635
      verb = 'Updated'
1✔
636
    else:
637
      verb = 'Added'
1✔
638

639
    author_urls = source.get_author_urls()
1✔
640
    link = ('http://indiewebify.me/send-webmentions/?url=' + author_urls[0]
1✔
641
            if author_urls else 'http://indiewebify.me/#send-webmentions')
642
    feature = source.features[0] if source.features else 'listen'
1✔
643
    blurb = '%s %s. %s' % (
1✔
644
      verb, source.label(),
645
      'Try previewing a post from your web site!' if feature == 'publish'
646
      else '<a href="%s">Try a webmention!</a>' % link if feature == 'webmention'
647
      else "Refresh in a minute to see what we've found!")
648
    logger.info(f'{blurb} {source.bridgy_url()}')
1✔
649

650
    source.verify()
1✔
651
    if source.verified():
1✔
652
      flash(blurb)
1✔
653

654
    source.put()
1✔
655

656
    if 'webmention' in source.features:
1✔
657
      try:
1✔
658
        superfeedr.subscribe(source)
1✔
659
      except BaseException as e:
1✔
660
        code, _ = util.interpret_http_exception(e)
1✔
661
        if (code in superfeedr.TRANSIENT_ERROR_HTTP_CODES or
1✔
662
            util.is_connection_failure(e)):
663
          flash('Apologies, <a href="https://superfeedr.com/">Superfeedr</a> is having technical difficulties. Please try again later!', escape=False)
1✔
664
          return None
1✔
665
        raise
×
666

667
    if 'listen' in source.features and source.AUTO_POLL:
1✔
668
      util.add_poll_task(source, now=True)
1✔
669
      util.add_poll_task(source)
1✔
670

671
    return source
1✔
672

673
  def verified(self):
1✔
674
    """Returns True if this source is ready to be used, False otherwise.
675

676
    See :meth:`verify()` for details. May be overridden by subclasses, e.g.
677
    :class:`tumblr.Tumblr`.
678
    """
679
    if not self.domains or not self.domain_urls:
1✔
680
      return False
1✔
681
    if 'webmention' in self.features and not self.webmention_endpoint:
1✔
682
      return False
1✔
683
    if ('listen' in self.features and
1✔
684
        not (self.webmention_endpoint or self.last_webmention_sent)):
685
      return False
1✔
686
    return True
1✔
687

688
  def verify(self, force=False):
1✔
689
    """Checks that this source is ready to be used.
690

691
    For blog and listen sources, this fetches their front page HTML and
692
    discovers their webmention endpoint. For publish sources, this checks that
693
    they have a domain.
694

695
    May be overridden by subclasses, e.g. :class:`tumblr.Tumblr`.
696

697
    Args:
698
      force (bool): if True, fully verifies (e.g. re-fetches the blog's HTML and
699
        performs webmention discovery) even we already think this source is
700
        verified.
701
    """
702
    author_urls = [u for u, d in zip(self.get_author_urls(), self.domains)
1✔
703
                   if not util.in_webmention_blocklist(d)]
704
    if ((self.verified() and not force) or self.status == 'disabled' or
1✔
705
        not self.features or not author_urls):
706
      return
1✔
707

708
    author_url = author_urls[0]
1✔
709
    try:
1✔
710
      got = webmention.discover(author_url, timeout=util.HTTP_TIMEOUT)
1✔
711
      self.webmention_endpoint = got.endpoint
1✔
712
      self._fetched_html = got.response.text
1✔
713
    except BaseException as e:
1✔
714
      logger.info('Error discovering webmention endpoint', exc_info=e)
1✔
715
      self.webmention_endpoint = None
1✔
716

717
    self.put()
1✔
718

719
  def urls_and_domains(self, auth_entity, user_url, actor=None,
1✔
720
                       resolve_source_domain=True):
721
    """Returns this user's valid (not webmention-blocklisted) URLs and domains.
722

723
    Converts the auth entity's ``user_json`` to an ActivityStreams actor and
724
    uses its ``urls`` and ``url`` fields. May be overridden by subclasses.
725

726
    Args:
727
      auth_entity (oauth_dropins.models.BaseAuth)
728
      user_url (str): optional URL passed in when authorizing
729
      actor (dict): optional AS actor for the user. If provided, overrides
730
        auth_entity
731
      resolve_source_domain (bool): whether to follow redirects on URLs on
732
        this source's domain
733

734
    Returns:
735
      ([str url, ...], [str domain, ...]) tuple:
736
    """
737
    if not actor:
1✔
738
      actor = self.gr_source.user_to_actor(json_loads(auth_entity.user_json))
1✔
739
    logger.debug(f'Extracting URLs and domains from actor: {json_dumps(actor, indent=2)}')
1✔
740

741
    candidates = util.trim_nulls(util.uniquify(
1✔
742
        [user_url] + as1.object_urls(actor)))
743

744
    if len(candidates) > MAX_AUTHOR_URLS:
1✔
745
      logger.info(f'Too many profile links! Only resolving the first {MAX_AUTHOR_URLS}: {candidates}')
1✔
746

747
    urls = []
1✔
748
    for i, url in enumerate(candidates):
1✔
749
      on_source_domain = util.domain_from_link(url) == self.gr_source.DOMAIN
1✔
750
      resolve = ((resolve_source_domain or not on_source_domain) and
1✔
751
                 i < MAX_AUTHOR_URLS)
752
      resolved = self.resolve_profile_url(url, resolve=resolve)
1✔
753
      if resolved:
1✔
754
        urls.append(resolved)
1✔
755

756
    final_urls = []
1✔
757
    domains = []
1✔
758
    for url in util.dedupe_urls(urls):  # normalizes domains to lower case
1✔
759
      # skip links on this source's domain itself. only currently needed for
760
      # Mastodon; the other silo domains are in the webmention blocklist.
761
      domain = util.domain_from_link(url)
1✔
762
      if domain != self.gr_source.DOMAIN:
1✔
763
        final_urls.append(url)
1✔
764
        domains.append(domain)
1✔
765

766
    return final_urls, domains
1✔
767

768
  @staticmethod
1✔
769
  def resolve_profile_url(url, resolve=True):
1✔
770
    """Resolves a profile URL to be added to a source.
771

772
    Args:
773
      url (str)
774
      resolve (bool): whether to make HTTP requests to follow redirects, etc.
775

776
    Returns:
777
      str: resolved URL, or None
778
    """
779
    final, _, ok = util.get_webmention_target(url, resolve=resolve)
1✔
780
    if not ok:
1✔
781
      return None
1✔
782

783
    final = final.lower()
1✔
784
    if util.schemeless(final).startswith(util.schemeless(url.lower())):
1✔
785
      # redirected to a deeper path. use the original higher level URL. #652
786
      final = url
1✔
787

788
    # If final has a path segment check if root has a matching rel=me.
789
    match = re.match(r'^(https?://[^/]+)/.+', final)
1✔
790
    if match and resolve:
1✔
791
      root = match.group(1)
1✔
792
      try:
1✔
793
        mf2 = util.fetch_mf2(root)
1✔
794
        me_urls = mf2['rels'].get('me', [])
1✔
795
        if final in me_urls:
1✔
796
          final = root
1✔
797
      except requests.RequestException:
1✔
798
        logger.warning(f"Couldn't fetch {root}, preserving path in {final}", exc_info=True)
1✔
799

800
    return final
1✔
801

802
  def canonicalize_url(self, url, activity=None, **kwargs):
1✔
803
    """Canonicalizes a post or object URL.
804

805
    Wraps :class:`oauth_dropins.webutil.util.UrlCanonicalizer`.
806
    """
807
    return self.URL_CANONICALIZER(url, **kwargs) if self.URL_CANONICALIZER else url
1✔
808

809
  def infer_profile_url(self, url):
1✔
810
    """Given a silo profile, tries to find the matching Bridgy user URL.
811

812
    Queries Bridgy's registered accounts for users with a particular
813
    domain in their silo profile.
814

815
    Args:
816
      url (str): a person's URL
817

818
    Return:
819
      str: URL for their profile on this service, or None
820

821
    """
822
    domain = util.domain_from_link(url)
1✔
823
    if domain == self.gr_source.DOMAIN:
1✔
824
      return url
1✔
825
    user = self.__class__.query(self.__class__.domains == domain).get()
1✔
826
    if user:
1✔
827
      return self.gr_source.user_url(user.key_id())
1✔
828

829
  def preprocess_for_publish(self, obj):
1✔
830
    """Preprocess an object before trying to publish it.
831

832
    By default this tries to massage person tags so that the tag's
833
    ``url`` points to the person's profile on this service (as opposed
834
    to a person's homepage).
835

836
    The object is modified in place.
837

838
    Args:
839
      obj (dict): ActivityStreams activity or object
840
    """
841
    if isinstance(obj, str):
1✔
842
      return obj
1✔
843

844
    for tag in obj.get('tags', []):
1✔
845
      if tag.get('objectType') == 'person':
1✔
846
        silo_url = None
1✔
847
        for url in as1.object_urls(tag):
1✔
848
          silo_url = url and self.infer_profile_url(url)
1✔
849
          if silo_url:
1✔
850
            break
1✔
851
        if silo_url:
1✔
852
          tag['url'] = silo_url
1✔
853

854
    # recurse on contained object(s)
855
    for obj in util.get_list(obj, 'object'):
1✔
856
      self.preprocess_for_publish(obj)
1✔
857

858
  def on_new_syndicated_post(self, syndpost):
1✔
859
    """Called when a new :class:`SyndicatedPost` is stored for this source.
860

861
    Args:
862
      syndpost (SyndicatedPost)
863
    """
864
    pass
×
865

866
  def is_private(self):
1✔
867
    """Returns True if this source is private aka protected.
868

869
    ...ie their posts are not public.
870
    """
871
    return False
1✔
872

873
  def is_beta_user(self):
1✔
874
    """Returns True if this is a "beta" user opted into new features.
875

876
    Beta users come from ``beta_users.txt``.
877
    """
878
    return self.bridgy_path() in util.BETA_USER_PATHS
1✔
879

880
  def is_volume_user(self):
1✔
881
    """Returns True if this is a "volume" user special cased to poll faster.
882

883
    Volume users come from ``volume_users.txt``.
884
    """
885
    return self.bridgy_path() in util.VOLUME_USER_PATHS
1✔
886

887
  def load_blocklist(self):
1✔
888
    """Fetches this user's blocklist, if supported, and stores it in the entity."""
889
    if not self.HAS_BLOCKS:
1✔
890
      return
×
891

892
    try:
1✔
893
      ids = self.gr_source.get_blocklist_ids()
1✔
894
    except gr_source.RateLimited as e:
1✔
895
      ids = e.partial or []
1✔
896

897
    self.blocked_ids = ids[:BLOCKLIST_MAX_IDS]
1✔
898
    self.put()
1✔
899

900
  def is_blocked(self, obj):
1✔
901
    """Returns True if an object's author is being blocked.
902

903
    ...ie they're in this user's block list.
904

905
    Note that this method is tested in test_twitter.py, not test_models.py, for
906
    historical reasons.
907
    """
908
    if not self.blocked_ids:
1✔
909
      return False
1✔
910

911
    for o in [obj] + util.get_list(obj, 'object'):
1✔
912
      for field in 'author', 'actor':
1✔
913
        if o.get(field, {}).get('numeric_id') in self.blocked_ids:
1✔
914
          return True
1✔
915

916

917
class Webmentions(StringIdModel):
1✔
918
  """A bundle of links to send webmentions for.
919

920
  Use the :class:`Response` and :class:`BlogPost` concrete subclasses below.
921
  """
922
  STATUSES = ('new', 'processing', 'complete', 'error')
1✔
923

924
  source = ndb.KeyProperty()
1✔
925
  status = ndb.StringProperty(choices=STATUSES, default='new')
1✔
926
  leased_until = ndb.DateTimeProperty(tzinfo=timezone.utc)
1✔
927
  created = ndb.DateTimeProperty(auto_now_add=True, tzinfo=timezone.utc)
1✔
928
  updated = ndb.DateTimeProperty(auto_now=True, tzinfo=timezone.utc)
1✔
929

930
  # Original post links, ie webmention targets
931
  sent = ndb.StringProperty(repeated=True)
1✔
932
  unsent = ndb.StringProperty(repeated=True)
1✔
933
  error = ndb.StringProperty(repeated=True)
1✔
934
  failed = ndb.StringProperty(repeated=True)
1✔
935
  skipped = ndb.StringProperty(repeated=True)
1✔
936

937
  def label(self):
1✔
938
    """Returns a human-readable string description for use in log messages.
939

940
    To be implemented by subclasses.
941
    """
942
    raise NotImplementedError()
×
943

944
  def add_task(self):
1✔
945
    """Adds a propagate task for this entity.
946

947
    To be implemented by subclasses.
948
    """
949
    raise NotImplementedError()
×
950

951
  @ndb.transactional()
1✔
952
  def get_or_save(self):
1✔
953
    entity = existing = self.key.get()
1✔
954

955
    propagate = False
1✔
956
    if entity:
1✔
957
      # merge targets
958
      urls = set(entity.sent + entity.unsent + entity.error +
1✔
959
                 entity.failed + entity.skipped)
960
      for field in ('sent', 'unsent', 'error', 'failed', 'skipped'):
1✔
961
        entity_urls = getattr(entity, field)
1✔
962
        new_urls = set(getattr(self, field)) - urls
1✔
963
        entity_urls += new_urls
1✔
964
        if new_urls and field in ('unsent', 'error'):
1✔
965
          propagate = True
1✔
966
    else:
967
      entity = self
1✔
968
      propagate = self.unsent or self.error
1✔
969

970
    if propagate:
1✔
971
      logger.debug(f'New webmentions to propagate! {entity.label()}')
1✔
972
      entity.add_task()
1✔
973
    elif not existing:
1✔
974
      entity.status = 'complete'
1✔
975

976
    entity.put()
1✔
977
    return entity
1✔
978

979
  def restart(self):
1✔
980
    """Moves status and targets to 'new' and adds a propagate task."""
981
    self.status = 'new'
1✔
982
    self.unsent = util.dedupe_urls(self.unsent + self.sent + self.error +
1✔
983
                                   self.failed + self.skipped)
984
    self.sent = self.error = self.failed = self.skipped = []
1✔
985

986
    # clear any cached webmention endpoints
987
    with util.webmention_endpoint_cache_lock:
1✔
988
      for url in self.unsent:
1✔
989
        util.webmention_endpoint_cache.pop(util.webmention_endpoint_cache_key(url), None)
1✔
990

991
    # this datastore put and task add should be transactional, but Cloud Tasks
992
    # doesn't support that :(
993
    # https://cloud.google.com/appengine/docs/standard/python/taskqueue/push/migrating-push-queues#features-not-available
994
    # https://github.com/googleapis/python-tasks/issues/26
995
    #
996
    # The new "bundled services" bridge for the old App Engine APIs still
997
    # supports them, but only because that's literally on the old backends,
998
    # which seems like a dead end.
999
    # https://groups.google.com/g/google-appengine/c/22BKInlWty0/m/05ObNEdsAgAJ
1000
    self.put()
1✔
1001
    self.add_task()
1✔
1002

1003

1004
class Response(Webmentions):
1✔
1005
  """A comment, like, or repost to be propagated.
1006

1007
  The key name is the comment object id as a tag URI.
1008
  """
1009
  # ActivityStreams JSON activity and comment, like, or repost
1010
  type = ndb.StringProperty(choices=VERB_TYPES, default='comment')
1✔
1011
  # These are TextProperty, and not JsonProperty, so that their plain text is
1012
  # visible in the App Engine admin console. (JsonProperty uses a blob. :/)
1013
  activities_json = ndb.TextProperty(repeated=True)
1✔
1014
  response_json = ndb.TextProperty()
1✔
1015
  # Old values for response_json. Populated when the silo reports that the
1016
  # response has changed, e.g. the user edited a comment or changed their RSVP
1017
  # to an event. Currently unused, kept for historical records only.
1018
  old_response_jsons = ndb.TextProperty(repeated=True)
1✔
1019
  # JSON dict mapping original post url to activity index in activities_json.
1020
  urls_to_activity = ndb.TextProperty()
1✔
1021
  # Original post links found by original post discovery
1022
  original_posts = ndb.StringProperty(repeated=True)
1✔
1023

1024
  def label(self):
1✔
1025
    return ' '.join((self.key.kind(), self.type, self.key.id(),
1✔
1026
                     json_loads(self.response_json).get('url', '[no url]')))
1027

1028
  def add_task(self):
1✔
1029
    util.add_propagate_task(self)
1✔
1030

1031
  @staticmethod
1✔
1032
  def get_type(obj):
1✔
1033
    type = get_type(obj)
1✔
1034
    return type if type in VERB_TYPES else 'comment'
1✔
1035

1036
  def get_or_save(self, source, restart=False):
1✔
1037
    resp = super().get_or_save()
1✔
1038

1039
    if (self.type != resp.type or
1✔
1040
        as1.activity_changed(json_loads(resp.response_json),
1041
                             json_loads(self.response_json),
1042
                             log=True)):
1043
      logger.info(f'Response changed! Re-propagating. Original: {resp}')
1✔
1044

1045
      # merge response_json
1046
      resp.old_response_jsons = [resp.response_json] + resp.old_response_jsons[:10]
1✔
1047

1048
      response_json_to_append = json_loads(self.response_json)
1✔
1049
      as1.append_in_reply_to(json_loads(resp.response_json), response_json_to_append)
1✔
1050
      self.response_json = json_dumps(util.trim_nulls(response_json_to_append))
1✔
1051
      resp.response_json = self.response_json
1✔
1052

1053
    elif resp is self or not restart:  # ie it already existed
1✔
1054
      return resp
1✔
1055

1056
    # merge activities_json, urls_to_activity
1057
    urls_to_full_activities = {}
1✔
1058
    for r in self, resp:
1✔
1059
      if r.urls_to_activity:
1✔
1060
        urls_to_activity = json_loads(r.urls_to_activity)
1✔
1061
        for url, index in urls_to_activity.items():
1✔
1062
          urls_to_full_activities[url] = r.activities_json[index]
1✔
1063
      elif r.activities_json:
1✔
1064
        # HACK: we used to not store urls_to_activity when activities_json was only
1065
        # one element. for those Responses, we won't have the target URL here, so
1066
        # just use None
1067
        urls_to_full_activities[None] = r.activities_json[-1]
1✔
1068

1069
    # this depends on the fact that dict key and value views have the same matching
1070
    # order, deterministically, since Python 3.7
1071
    resp.urls_to_activity = json_dumps(
1✔
1072
      {url: i for i, url in enumerate(urls_to_full_activities.keys())})
1073
    resp.activities_json = list(urls_to_full_activities.values())
1✔
1074

1075
    resp.restart(source)
1✔
1076
    return resp
1✔
1077

1078
  def restart(self, source=None):
1✔
1079
    """Moves status and targets to 'new' and adds a propagate task."""
1080
    # add original posts with syndication URLs
1081
    # TODO: unify with Poll.repropagate_old_responses()
1082
    if not source:
1✔
1083
      source = self.source.get()
1✔
1084

1085
    synd_urls = set()
1✔
1086
    for activity_json in self.activities_json:
1✔
1087
      activity = json_loads(activity_json)
1✔
1088
      url = activity.get('url') or activity.get('object', {}).get('url')
1✔
1089
      if url:
1✔
1090
        url = source.canonicalize_url(url, activity=activity)
1✔
1091
        if url:
1✔
1092
          synd_urls.add(url)
1✔
1093

1094
    if synd_urls:
1✔
1095
      self.unsent += [synd.original for synd in
1✔
1096
                      SyndicatedPost.query(SyndicatedPost.syndication.IN(synd_urls))
1097
                      if synd.original]
1098

1099
    return super().restart()
1✔
1100

1101

1102
class Activity(StringIdModel):
1✔
1103
  """An activity with responses to be propagated.
1104

1105
  The key name is the activity id as a tag URI.
1106

1107
  Currently only used for posts sent to us by the browser extension.
1108
  """
1109
  source = ndb.KeyProperty()
1✔
1110
  created = ndb.DateTimeProperty(auto_now_add=True, tzinfo=timezone.utc)
1✔
1111
  updated = ndb.DateTimeProperty(auto_now=True, tzinfo=timezone.utc)
1✔
1112
  activity_json = ndb.TextProperty()
1✔
1113
  html = ndb.TextProperty()
1✔
1114

1115

1116
class BlogPost(Webmentions):
1✔
1117
  """A blog post to be processed for links to send webmentions to.
1118

1119
  The key name is the URL.
1120
  """
1121
  feed_item = ndb.JsonProperty(compressed=True)  # from Superfeedr
1✔
1122

1123
  def label(self):
1✔
1124
    url = self.feed_item.get('permalinkUrl') if self.feed_item else None
1✔
1125
    return ' '.join((self.key.kind(), self.key.id(), url or '[no url]'))
1✔
1126

1127
  def add_task(self):
1✔
1128
    util.add_propagate_blogpost_task(self)
1✔
1129

1130

1131
class PublishedPage(StringIdModel):
1✔
1132
  """Minimal root entity for :class:`Publish` children with the same source URL.
1133

1134
  Key id is the string source URL.
1135
  """
1136
  pass
1✔
1137

1138

1139
class Publish(ndb.Model):
1✔
1140
  """A comment, like, repost, or RSVP published into a silo.
1141

1142
  Child of a :class:`PublishedPage` entity.
1143
  """
1144
  STATUSES = ('new', 'complete', 'failed', 'deleted')
1✔
1145

1146
  type = ndb.StringProperty(choices=PUBLISH_TYPES)
1✔
1147
  status = ndb.StringProperty(choices=STATUSES, default='new')
1✔
1148
  source = ndb.KeyProperty()
1✔
1149
  html = ndb.TextProperty()  # raw HTML fetched from source
1✔
1150
  mf2 = ndb.JsonProperty()   # mf2 from micropub request
1✔
1151
  published = ndb.JsonProperty(compressed=True)
1✔
1152
  created = ndb.DateTimeProperty(auto_now_add=True, tzinfo=timezone.utc)
1✔
1153
  updated = ndb.DateTimeProperty(auto_now=True, tzinfo=timezone.utc)
1✔
1154

1155
  def type_label(self):
1✔
1156
    """Returns silo-specific string type, e.g. 'favorite' instead of 'like'."""
1157
    for cls in sources.values():  # global
1✔
1158
      if cls.__name__ == self.source.kind():
1✔
1159
        return cls.TYPE_LABELS.get(self.type, self.type)
1✔
1160

1161
    return self.type
×
1162

1163

1164
class BlogWebmention(Publish, StringIdModel):
1✔
1165
  """Datastore entity for webmentions for hosted blog providers.
1166

1167
  Key id is the source URL and target URL concated with a space, ie ``SOURCE
1168
  TARGET``. The source URL is *always* the URL given in the webmention HTTP
1169
  request. If the source page has a ``u-url``, that's stored in the
1170
  :attr:`u_url` property. The target URL is always the final URL, after any
1171
  redirects.
1172

1173
  Reuses :class:`Publish`'s fields, but otherwise unrelated.
1174
  """
1175
  # If the source page has a u-url, it's stored here and overrides the source
1176
  # URL in the key id.
1177
  u_url = ndb.StringProperty()
1✔
1178

1179
  # Any initial target URLs that redirected to the final target URL, in redirect
1180
  # order.
1181
  redirected_target_urls = ndb.StringProperty(repeated=True)
1✔
1182

1183
  def source_url(self):
1✔
1184
    return self.u_url or self.key.id().split()[0]
1✔
1185

1186
  def target_url(self):
1✔
1187
    return self.key.id().split()[1]
×
1188

1189

1190
class SyndicatedPost(ndb.Model):
1✔
1191
  """Represents a syndicated post and its discovered original (or not
1192
  if we found no original post).  We discover the relationship by
1193
  following rel=syndication links on the author's h-feed.
1194

1195
  See :mod:`original_post_discovery`.
1196

1197
  When a :class:`SyndicatedPost` entity is about to be stored,
1198
  :meth:`source.Source.on_new_syndicated_post` is called before it's stored.
1199
  """
1200

1201
  syndication = ndb.StringProperty()
1✔
1202
  original = ndb.StringProperty()
1✔
1203
  created = ndb.DateTimeProperty(auto_now_add=True, tzinfo=timezone.utc)
1✔
1204
  updated = ndb.DateTimeProperty(auto_now=True, tzinfo=timezone.utc)
1✔
1205

1206
  @classmethod
1✔
1207
  @ndb.transactional()
1✔
1208
  def insert_original_blank(cls, source, original):
1✔
1209
    """Insert a new original -> None relationship. Does a check-and-set to
1210
    make sure no previous relationship exists for this original. If
1211
    there is, nothing will be added.
1212

1213
    Args:
1214
      source (Source)
1215
      original (str)
1216
    """
1217
    if cls.query(cls.original == original, ancestor=source.key).get():
1✔
1218
      return
×
1219
    cls(parent=source.key, original=original, syndication=None).put()
1✔
1220

1221
  @classmethod
1✔
1222
  @ndb.transactional()
1✔
1223
  def insert_syndication_blank(cls, source, syndication):
1✔
1224
    """Insert a new syndication -> None relationship. Does a check-and-set
1225
    to make sure no previous relationship exists for this
1226
    syndication. If there is, nothing will be added.
1227

1228
    Args:
1229
      source (Source)
1230
      original (str)
1231
    """
1232

1233
    if cls.query(cls.syndication == syndication, ancestor=source.key).get():
1✔
1234
      return
1✔
1235
    cls(parent=source.key, original=None, syndication=syndication).put()
1✔
1236

1237
  @classmethod
1✔
1238
  @ndb.transactional()
1✔
1239
  def insert(cls, source, syndication, original):
1✔
1240
    """Insert a new (non-blank) syndication -> original relationship.
1241

1242
    This method does a check-and-set within transaction to avoid
1243
    including duplicate relationships.
1244

1245
    If blank entries exists for the syndication or original URL
1246
    (i.e. syndication -> None or original -> None), they will first be
1247
    removed. If non-blank relationships exist, they will be retained.
1248

1249
    Args:
1250
      source (Source)
1251
      syndication (str)
1252
      original (str)
1253

1254
    Returns:
1255
      SyndicatedPost: newly created or preexisting entity
1256
    """
1257
    # check for an exact match
1258
    duplicate = cls.query(cls.syndication == syndication,
1✔
1259
                          cls.original == original,
1260
                          ancestor=source.key).get()
1261
    if duplicate:
1✔
1262
      return duplicate
1✔
1263

1264
    # delete blanks (expect at most 1 of each)
1265
    for filter in (ndb.AND(cls.syndication == syndication, cls.original == None),
1✔
1266
                   ndb.AND(cls.original == original, cls.syndication == None)):
1267
      for synd in cls.query(filter, ancestor=source.key).fetch(keys_only=True):
1✔
1268
        synd.delete()
1✔
1269

1270
    r = cls(parent=source.key, original=original, syndication=syndication)
1✔
1271
    r.put()
1✔
1272
    return r
1✔
1273

1274

1275
class Domain(StringIdModel):
1✔
1276
  """A domain owned by a user.
1277

1278
  Ownership is proven via IndieAuth. Supports secret tokens associated with each
1279
  domain. Clients can include a token with requests that operate on a given
1280
  domain, eg sending posts and responses from the browser extension.
1281

1282
  Key id is the string domain, eg ``example.com``.
1283
  """
1284
  tokens = ndb.StringProperty(repeated=True)
1✔
1285
  auth = ndb.KeyProperty(IndieAuth)
1✔
1286
  created = ndb.DateTimeProperty(auto_now_add=True, tzinfo=timezone.utc)
1✔
1287
  updated = ndb.DateTimeProperty(auto_now=True, tzinfo=timezone.utc)
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc