• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

snarfed / bridgy-fed / dbaf05fd-1c60-4689-995f-78f7c268c56c

13 Sep 2024 08:09PM UTC coverage: 92.787% (-0.05%) from 92.832%
dbaf05fd-1c60-4689-995f-78f7c268c56c

push

circleci

snarfed
stop unwrapping ids etc in ActivityPub.inbox, do it in Object.as1 instead

for #12

2 of 2 new or added lines in 2 files covered. (100.0%)

2 existing lines in 1 file now uncovered.

4155 of 4478 relevant lines covered (92.79%)

0.93 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

93.98
/web.py
1
"""Webmention protocol with microformats2 in HTML, aka the IndieWeb stack."""
2
from datetime import timedelta, timezone
1✔
3
import difflib
1✔
4
import logging
1✔
5
import re
1✔
6
import statistics
1✔
7
import urllib.parse
1✔
8
from urllib.parse import quote, urlencode, urljoin, urlparse
1✔
9
from xml.etree import ElementTree
1✔
10

11
import brevity
1✔
12
from flask import redirect, render_template, request
1✔
13
from google.cloud import ndb
1✔
14
from google.cloud.ndb import ComputedProperty
1✔
15
from granary import as1, as2, atom, microformats2, rss
1✔
16
import mf2util
1✔
17
from oauth_dropins.webutil import flask_util, util
1✔
18
from oauth_dropins.webutil.appengine_config import tasks_client
1✔
19
from oauth_dropins.webutil import appengine_info
1✔
20
from oauth_dropins.webutil.flask_util import cloud_tasks_only, error, flash
1✔
21
from oauth_dropins.webutil.util import json_dumps, json_loads
1✔
22
from oauth_dropins.webutil import webmention
1✔
23
from requests import HTTPError, RequestException
1✔
24
from requests.auth import HTTPBasicAuth
1✔
25
from werkzeug.exceptions import BadGateway, BadRequest, HTTPException, NotFound
1✔
26

27
import common
1✔
28
from common import (
1✔
29
    CACHE_CONTROL,
30
    DOMAIN_RE,
31
    PRIMARY_DOMAIN,
32
    PROTOCOL_DOMAINS,
33
    SUPERDOMAIN,
34
)
35
from flask_app import app
1✔
36
from ids import normalize_user_id, translate_object_id, translate_user_id
1✔
37
from models import Follower, Object, PROTOCOLS, Target, User
1✔
38
from protocol import Protocol
1✔
39

40
logger = logging.getLogger(__name__)
1✔
41

42
# https://github.com/snarfed/bridgy-fed/issues/314
43
WWW_DOMAINS = frozenset((
1✔
44
    'www.jvt.me',
45
))
46

47
FEED_TYPES = {
1✔
48
    atom.CONTENT_TYPE.split(';')[0]: 'atom',
49
    rss.CONTENT_TYPE.split(';')[0]: 'rss',
50
    # https://stackoverflow.com/questions/4832357/whats-the-difference-between-text-xml-vs-application-xml-for-webservice-respons
51
    'application/xml': 'xml',
52
    'text/xml': 'xml',
53
}
54
MIN_FEED_POLL_PERIOD = timedelta(hours=2)
1✔
55
MAX_FEED_POLL_PERIOD = timedelta(days=1)
1✔
56
MAX_FEED_PROPERTY_SIZE = 500 * 1000  # Object.atom/rss
1✔
57
MAX_FEED_ITEMS_PER_POLL = 10
1✔
58

59

60
def is_valid_domain(domain, allow_internal=True):
1✔
61
    """Returns True if this is a valid domain we can use, False otherwise.
62

63
    Args:
64
      domain (str):
65
      allow_internal (bool): whether to return True for internal domains
66
        like ``fed.brid.gy``, ``bsky.brid.gy``, etc
67

68
    Valid means TLD is ok, not blacklisted, etc.
69
    """
70
    if not domain or not re.match(DOMAIN_RE, domain):
1✔
71
        logger.debug(f"{domain} doesn't look like a domain")
1✔
72
        return False
1✔
73

74
    if Web.is_blocklisted(domain, allow_internal=allow_internal):
1✔
75
        logger.info(f'{domain} is blocklisted')
1✔
76
        return False
1✔
77

78
    tld = domain.split('.')[-1]
1✔
79
    if tld not in brevity.TLDS:
1✔
80
        logger.info(f"{domain} looks like a domain but {tld} isn't a TLD")
1✔
81
        return False
1✔
82

83
    return True
1✔
84

85

86
class Web(User, Protocol):
1✔
87
    """Web user and webmention protocol implementation.
88

89
    The key name is the domain.
90
    """
91
    ABBREV = 'web'
1✔
92
    PHRASE = 'the web'
1✔
93
    OTHER_LABELS = ('webmention',)
1✔
94
    LOGO_HTML = '🌐'  # used to be 🕸️
1✔
95
    CONTENT_TYPE = common.CONTENT_TYPE_HTML
1✔
96
    DEFAULT_ENABLED_PROTOCOLS = ('activitypub',)
1✔
97
    SUPPORTED_AS1_TYPES = (
1✔
98
        tuple(as1.ACTOR_TYPES)
99
        + tuple(as1.POST_TYPES)
100
        + tuple(as1.CRUD_VERBS)
101
        + ('audio', 'bookmark', 'event', 'image', 'video')
102
        + ('follow', 'like', 'share', 'stop-following')
103
    )
104

105
    has_redirects = ndb.BooleanProperty()
1✔
106
    redirects_error = ndb.TextProperty()
1✔
107
    has_hcard = ndb.BooleanProperty()
1✔
108
    last_webmention_in = ndb.DateTimeProperty(tzinfo=timezone.utc)
1✔
109
    last_polled_feed = ndb.DateTimeProperty(tzinfo=timezone.utc)
1✔
110
    feed_last_item = ndb.StringProperty()  # id (URL)
1✔
111
    feed_etag = ndb.StringProperty()
1✔
112
    feed_last_modified = ndb.StringProperty()
1✔
113

114
    # only used by protocol bot users in Bluesky, for polling their chat
115
    # messages with chat.bsky.convo.getLog
116
    atproto_last_chat_log_cursor = ndb.StringProperty()
1✔
117

118
    # Originally, BF served Web users' AP actor ids on fed.brid.gy, eg
119
    # https://fed.brid.gy/snarfed.org . When we started adding new protocols, we
120
    # switched to per-protocol subdomains, eg https://web.brid.gy/snarfed.org .
121
    # However, we need to preserve the old users' actor ids as is.
122
    #
123
    # Also, our per-protocol bot accounts in ActivityPub are on their own
124
    # subdomains, eg @bsky.brid.gy@bsky.brid.gy.
125
    #
126
    # So, this property tracks which subdomain a given Web user's AP actor uses.
127
    ap_subdomain = ndb.StringProperty(
1✔
128
        choices=['ap', 'bsky', 'fed', 'web', 'fake', 'other', 'eefake'],
129
        default='web')
130

131
    # OLD. some stored entities still have these; do not reuse.
132
    # superfeedr_subscribed = ndb.DateTimeProperty(tzinfo=timezone.utc)
133
    # superfeedr_subscribed_feed = ndb.StringProperty()
134

135
    @classmethod
1✔
136
    def _get_kind(cls):
1✔
137
        return 'MagicKey'
1✔
138

139
    def _pre_put_hook(self):
1✔
140
        """Validate domain id, don't allow upper case or invalid characters."""
141
        super()._pre_put_hook()
1✔
142
        id = self.key.id()
1✔
143
        assert is_valid_domain(id), id
1✔
144
        assert id.lower() == id, f'upper case is not allowed in Web key id: {id}'
1✔
145

146
    @classmethod
1✔
147
    def get_or_create(cls, id, allow_opt_out=False, verify=None, **kwargs):
1✔
148
        """Normalize domain, then pass through to :meth:`User.get_or_create`.
149

150
        Normalizing currently consists of lower casing and removing leading and
151
        trailing dots.
152

153
        Args:
154
          verify (bool): whether to call :meth:`verify` to load h-card, check
155
            redirects, etc. Defaults to calling it only if the user is new.
156
        """
157
        key = cls.key_for(id, allow_opt_out=allow_opt_out)
1✔
158
        if not key:
1✔
159
            return None  # opted out
1✔
160

161
        domain = key.id()
1✔
162
        if util.domain_or_parent_in(domain, [SUPERDOMAIN.strip('.')]):
1✔
163
            return super().get_by_id(domain)
1✔
164

165
        user = super().get_or_create(domain, allow_opt_out=allow_opt_out, **kwargs)
1✔
166

167
        if verify or (verify is None and not user.existing):
1✔
168
            user = user.verify()
1✔
169

170
        if not user.existing:
1✔
171
            common.create_task(queue='poll-feed', domain=user.key.id())
1✔
172

173
        return user
1✔
174

175
    @ndb.ComputedProperty
1✔
176
    def handle(self):
1✔
177
        """Returns this user's chosen username or domain, eg ``user.com``."""
178
        # prettify if domain, noop if username
179
        username = self.username()
1✔
180
        if username != self.key.id():
1✔
181
            return util.domain_from_link(username, minimize=False)
1✔
182
        return username
1✔
183

184
    def handle_as(self, to_proto):
1✔
185
        """Special case ActivityPub to use custom username."""
186
        if to_proto in ('activitypub', 'ap', PROTOCOLS['ap']):
1✔
187
            return (f'@{self.username()}@{self.key.id()}' if self.has_redirects
1✔
188
                    else f'@{self.key.id()}@{self.ap_subdomain}{SUPERDOMAIN}')
189

190
        return super().handle_as(to_proto)
1✔
191

192
    def id_as(self, to_proto):
1✔
193
        """Special case ActivityPub to use ``ap_subdomain``."""
194
        if isinstance(to_proto, str):
1✔
195
            to_proto = PROTOCOLS[to_proto]
×
196

197
        converted = translate_user_id(id=self.key.id(), from_=self,
1✔
198
                                      to=to_proto)
199

200
        if to_proto.LABEL == 'activitypub':
1✔
201
            other = 'web' if self.ap_subdomain == 'fed' else 'fed'
1✔
202
            converted = converted.replace(f'https://{other}.brid.gy/',
1✔
203
                                          f'https://{self.ap_subdomain}.brid.gy/')
204

205
        return converted
1✔
206

207
    web_url = User.profile_id
1✔
208

209
    def is_web_url(self, url):
1✔
210
        return super().is_web_url(url, ignore_www=True)
1✔
211

212
    def user_page_path(self, rest=None):
1✔
213
        """Always use domain."""
214
        path = f'/{self.ABBREV}/{self.key.id()}'
1✔
215

216
        if rest:
1✔
217
            if not rest.startswith('?'):
1✔
218
                path += '/'
1✔
219
            path += rest.lstrip('/')
1✔
220

221
        return path
1✔
222

223
    def username(self):
1✔
224
        """Returns the user's preferred username.
225

226
        Uses stored representative h-card if available, falls back to id.
227

228
        Returns:
229
          str:
230
        """
231
        id = self.key.id()
1✔
232

233
        if self.obj and self.obj.as1 and self.direct:
1✔
234
            for url in (util.get_list(self.obj.as1, 'url') +
1✔
235
                        util.get_list(self.obj.as1, 'urls')):
236
                url = url.get('value') if isinstance(url, dict) else url
1✔
237
                if url and url.startswith('acct:'):
1✔
238
                    try:
1✔
239
                        urluser, urldomain = util.parse_acct_uri(url)
1✔
240
                    except ValueError as e:
1✔
241
                        continue
1✔
242
                    if urldomain == id:
1✔
243
                        logger.info(f'Found custom username: {urluser}')
1✔
244
                        return urluser
1✔
245

246
        # logger.debug(f'Defaulting username to key id {id}')
247
        return id
1✔
248

249
    def verify(self):
1✔
250
        """Fetches site a couple ways to check for redirects and h-card.
251

252
        Returns:
253
          web.Web: user that was verified. May be different than self! eg if
254
          self's domain started with www and we switch to the root domain.
255
        """
256
        domain = self.key.id()
1✔
257
        logger.info(f'Verifying {domain}')
1✔
258

259
        if domain.startswith('www.') and domain not in WWW_DOMAINS:
1✔
260
            # if root domain serves ok, use it instead
261
            # https://github.com/snarfed/bridgy-fed/issues/314
262
            root = domain.removeprefix('www.')
1✔
263
            root_site = f'https://{root}/'
1✔
264
            try:
1✔
265
                resp = util.requests_get(root_site, gateway=False)
1✔
266
                if resp.ok and self.is_web_url(resp.url):
1✔
267
                    logger.info(f'{root_site} serves ok ; using {root} instead')
1✔
268
                    root_user = Web.get_or_create(
1✔
269
                        root,
270
                        enabled_protocols=self.enabled_protocols,
271
                        direct=self.direct)
272
                    self.use_instead = root_user.key
1✔
273
                    self.put()
1✔
274
                    return root_user.verify()
1✔
275
            except RequestException as e:
×
276
                logger.info(f"Couldn't fetch {root_site} : {e}")
×
277
                logger.info(f"Continuing with {domain}")
×
278
                pass
×
279

280
        # check webfinger redirect
281
        path = f'/.well-known/webfinger?resource=acct:{domain}@{domain}'
1✔
282
        self.has_redirects = False
1✔
283
        self.redirects_error = None
1✔
284
        try:
1✔
285
            url = urljoin(self.web_url(), path)
1✔
286
            resp = util.requests_get(url, gateway=False)
1✔
287
            domain_urls = ([f'https://{domain}/' for domain in common.DOMAINS] +
1✔
288
                           [common.host_url()])
289
            expected = [urljoin(url, path) for url in domain_urls]
1✔
290
            if resp.url:
1✔
291
                got = urllib.parse.unquote(resp.url)
1✔
292
                if got in expected:
1✔
293
                    self.has_redirects = True
1✔
294
                elif got:
1✔
295
                    diff = '\n'.join(difflib.Differ().compare([got], [expected[0]]))
1✔
296
                    self.redirects_error = f'Current vs expected:<pre>{diff}</pre>'
1✔
297
            else:
298
                lines = [url, f'  returned HTTP {resp.status_code}']
1✔
299
                if resp.url and resp.url != url:
1✔
300
                    lines[1:1] = ['  redirected to:', resp.url]
×
301
                self.redirects_error = '<pre>' + '\n'.join(lines) + '</pre>'
1✔
302
        except RequestException:
×
303
            pass
×
304

305
        # check home page
306
        self.obj = None
1✔
307
        self.has_hcard = False
1✔
308
        try:
1✔
309
            self.obj = Web.load(self.web_url(), remote=True, gateway=True)
1✔
310
            if self.obj:
1✔
311
                self.has_hcard = True
1✔
312
        except (BadRequest, NotFound):
1✔
313
            pass
1✔
314

315
        self.put()
1✔
316
        return self
1✔
317

318
    @classmethod
1✔
319
    def key_for(cls, id, allow_opt_out=False):
1✔
320
        """Returns the :class:`ndb.Key` for a given id.
321

322
        If id is a domain, uses it as is. If it's a home page URL or fed.brid.gy
323
        or web.brid.gy AP actor URL, extracts the domain and uses that.
324
        Otherwise, returns None.
325

326
        Args:
327
          id (str)
328
          allow_opt_out (bool): whether to allow users who are currently opted out
329

330
        Returns:
331
        ndb.Key or None:
332
        """
333
        if not id:
1✔
334
            return None
1✔
335

336
        id = id.lower().strip('.')
1✔
337
        if util.is_web(id):
1✔
338
            parsed = urlparse(id)
1✔
339
            if parsed.path in ('', '/'):
1✔
340
                id = parsed.netloc
1✔
341

342
        if is_valid_domain(id, allow_internal=True):
1✔
343
            return super().key_for(id, allow_opt_out=allow_opt_out)
1✔
344

345
        return None
1✔
346

347
    @classmethod
1✔
348
    def owns_id(cls, id):
1✔
349
        """Returns None if id is a domain or http(s) URL, False otherwise.
350

351
        All web pages are http(s) URLs, but not all http(s) URLs are web pages.
352
        """
353
        if not id:
1✔
354
            return False
×
355

356
        if key := cls.key_for(id):
1✔
357
            user = key.get()
1✔
358
            return True if user and user.has_redirects else None
1✔
359
        elif is_valid_domain(id):
1✔
360
            return None
1✔
361

362
        # we allowed internal domains for protocol bot actors above, but we
363
        # don't want to allow non-homepage URLs on those domains, eg
364
        # https://bsky.brid.gy/foo, so don't allow internal here
365
        domain = util.domain_from_link(id)
1✔
366
        if util.is_web(id) and is_valid_domain(domain, allow_internal=False):
1✔
367
            return None
1✔
368

369
        return False
1✔
370

371
    @classmethod
1✔
372
    def owns_handle(cls, handle, allow_internal=False):
1✔
373
        if handle == PRIMARY_DOMAIN or handle in PROTOCOL_DOMAINS:
1✔
374
            return True
1✔
375
        elif not is_valid_domain(handle, allow_internal=allow_internal):
1✔
376
            return False
1✔
377

378
    @classmethod
1✔
379
    def handle_to_id(cls, handle):
1✔
380
        assert cls.owns_handle(handle) is not False
1✔
381
        return handle
1✔
382

383
    @classmethod
1✔
384
    def target_for(cls, obj, shared=False):
1✔
385
        """Returns `obj`'s id, as a URL webmention target."""
386
        # TODO: we have entities in prod that fail this, eg
387
        # https://indieweb.social/users/bismark has source_protocol webmention
388
        # assert obj.source_protocol in (cls.LABEL, cls.ABBREV, 'ui', None), str(obj)
389

390
        if not util.is_web(obj.key.id()):
1✔
391
            logger.warning(f"{obj.key.id()} is source_protocol web but id isn't a URL!")
1✔
392
            return None
1✔
393

394
        return obj.key.id()
1✔
395

396
    @classmethod
1✔
397
    def send(to_cls, obj, url, from_user=None, orig_obj=None, **kwargs):
1✔
398
        """Sends a webmention to a given target URL.
399

400
        See :meth:`Protocol.send` for details.
401

402
        Returns False if the target URL doesn't advertise a webmention endpoint,
403
        or if webmention/microformats2 don't support the activity type.
404
        https://fed.brid.gy/docs#error-handling
405
        """
406
        targets = as1.targets(obj.as1)
1✔
407
        if not (url in targets or
1✔
408
                # homepage, check domain too
409
                (urlparse(url).path.strip('/') == ''
410
                 and util.domain_from_link(url) in targets)):
411
            logger.debug(f'Skipping sending to {url} , not a target in the object')
1✔
412
            return False
1✔
413

414
        if to_cls.is_blocklisted(url):
1✔
UNCOV
415
            logger.info(f'Skipping sending to blocklisted {url}')
×
UNCOV
416
            return False
×
417

418
        source_id = translate_object_id(
1✔
419
            id=obj.key.id(), from_=PROTOCOLS[obj.source_protocol], to=Web)
420
        source_url = quote(source_id, safe=':/%+')
1✔
421
        logger.info(f'Sending webmention from {source_url} to {url}')
1✔
422

423
        # we only send webmentions for responses. for sending normal posts etc
424
        # to followers, we just update our stored objects (elsewhere) and web
425
        # users consume them via feeds.
426
        endpoint = common.webmention_discover(url).endpoint
1✔
427
        if not endpoint:
1✔
428
            return False
1✔
429

430
        webmention.send(endpoint, source_url, url)
1✔
431
        return True
1✔
432

433
    @classmethod
1✔
434
    def load(cls, id, **kwargs):
1✔
435
        """Wrap :meth:`Protocol.load` to convert domains to homepage URLs."""
436
        if re.match(DOMAIN_RE, id):
1✔
437
            id = f'https://{id}/'
1✔
438

439
        return super().load(id, **kwargs)
1✔
440

441
    @classmethod
1✔
442
    def fetch(cls, obj, gateway=False, check_backlink=False,
1✔
443
              authorship_fetch_mf2=True, metaformats=None, **kwargs):
444
        """Fetches a URL over HTTP and extracts its microformats2.
445

446
        Follows redirects, but doesn't change the original URL in ``obj``'s id!
447
        :class:`google.cloud.ndb.model.Model` doesn't allow that anyway, but more
448
        importantly, we want to preserve that original URL becase other objects
449
        may refer to it instead of the final redirect destination URL.
450

451
        See :meth:`Protocol.fetch` for other background.
452

453
        Args:
454
          gateway (bool): passed through to
455
            :func:`oauth_dropins.webutil.util.fetch_mf2`
456
          check_backlink (bool): optional, whether to require a link to Bridgy
457
            Fed. Ignored if the URL is a homepage, ie has no path.
458
          authorship_fetch_mf2 (bool): optional, when running the authorship
459
            algorithm, fetch author URL if necessary
460
          kwargs: ignored
461
        """
462
        url = obj.key.id()
1✔
463
        if not util.is_web(url):
1✔
464
            logger.info(f'{url} is not a URL')
1✔
465
            return False
1✔
466

467
        is_homepage = urlparse(url).path.strip('/') == ''
1✔
468
        if is_homepage:
1✔
469
            domain = util.domain_from_link(url)
1✔
470
            if domain == PRIMARY_DOMAIN or domain in PROTOCOL_DOMAINS:
1✔
471
                profile = util.read(f'{domain}.as2.json')
1✔
472
                if profile:
1✔
473
                    obj.as2 = json_loads(profile)
1✔
474
                    return True
1✔
475
                return False
×
476

477
        require_backlink = (common.host_url().rstrip('/')
1✔
478
                            if check_backlink and not is_homepage
479
                            else None)
480
        if metaformats is None:
1✔
481
            # default to only for homepages
482
            metaformats = urlparse(url).path in ('', '/')
1✔
483

484
        try:
1✔
485
            parsed = util.fetch_mf2(url, gateway=gateway, metaformats=metaformats,
1✔
486
                                    require_backlink=require_backlink)
487
        except ValueError as e:
1✔
488
            error(str(e))
1✔
489

490
        if parsed is None:
1✔
491
            error(f'id {urlparse(url).fragment} not found in {url}')
1✔
492
        elif not parsed.get('items'):
1✔
493
            logger.info(f'No microformats2 found in {url}')
1✔
494
            return False
1✔
495

496
        # find mf2 item
497
        if is_homepage:
1✔
498
            logger.info(f"{url} is user's web url")
1✔
499
            entry = mf2util.representative_hcard(parsed, parsed['url'])
1✔
500
            if not entry:
1✔
501
                error(f"Couldn't find a representative h-card (http://microformats.org/wiki/representative-hcard-parsing) on {parsed['url']}")
1✔
502
            logger.info(f'Found representative h-card')
1✔
503
        else:
504
            entry = mf2util.find_first_entry(parsed, ['h-entry'])
1✔
505
            if not entry:
1✔
506
                error(f'No microformats2 h-entry found in {url}')
×
507

508
        # discard uid if set; we use URL as id
509
        props = entry.setdefault('properties', {})
1✔
510
        if 'uid' in props:
1✔
511
            logger.info(f'Discarding uid property: {props["uid"]}')
1✔
512
            props.pop('uid')
1✔
513

514
        # store final URL in mf2 object
515
        if is_homepage:
1✔
516
            entry.setdefault('rel-urls', {}).update(parsed.get('rel-urls', {}))
1✔
517
            entry.setdefault('type', ['h-card'])
1✔
518
        if parsed['url']:
1✔
519
            entry['url'] = parsed['url']
1✔
520
        logger.info(f'Extracted microformats2 entry: {json_dumps(entry, indent=2)}')
1✔
521

522
        if not is_homepage:
1✔
523
            # default actor/author to home page URL
524
            authors = props.setdefault('author', [])
1✔
525
            if not microformats2.get_string_urls(authors):
1✔
526
                homepage = urljoin(parsed.get('url') or url, '/')
1✔
527
                logger.info(f'Defaulting author URL to {homepage}')
1✔
528
                if authors and isinstance(authors[0], dict):
1✔
529
                    authors[0]['properties']['url'] = [homepage]
1✔
530
                else:
531
                    authors.insert(0, homepage)
1✔
532

533
            # run full authorship algorithm if necessary:
534
            # https://indieweb.org/authorship
535
            # duplicated in microformats2.json_to_object
536
            author = util.get_first(props, 'author')
1✔
537
            if not isinstance(author, dict):
1✔
538
                logger.info(f'Fetching full authorship for author {author}')
1✔
539
                fetch_fn = util.fetch_mf2 if authorship_fetch_mf2 else None
1✔
540
                author = mf2util.find_author({'items': [entry]}, hentry=entry,
1✔
541
                                             fetch_mf2_func=fetch_fn)
542
                logger.info(f'Got: {author}')
1✔
543
                if author:
1✔
544
                    props['author'] = util.trim_nulls([{
1✔
545
                        "type": ["h-card"],
546
                        'properties': {
547
                            field: [author[field]] if author.get(field) else []
548
                            for field in ('name', 'photo', 'url')
549
                        },
550
                    }])
551

552
        obj.mf2 = entry
1✔
553
        return True
1✔
554

555
    @classmethod
1✔
556
    def _convert(cls, obj, from_user=None):
1✔
557
        """Converts a :class:`Object` to HTML.
558

559
        Args:
560
          obj (models.Object)
561
          from_user (models.User): user (actor) this activity/object is from
562

563
        Returns:
564
          str:
565
        """
566
        if not obj or not obj.as1:
1✔
567
            return ''
×
568

569
        obj_as1 = obj.as1
1✔
570
        if from_user and not from_user.is_enabled(cls):
1✔
571
            error(f'{from_user.key.id()} => {cls.LABEL} not enabled')
×
572

573
        from_proto = PROTOCOLS.get(obj.source_protocol)
1✔
574
        if from_proto:
1✔
575
            # fill in author/actor if available
576
            for field in 'author', 'actor':
1✔
577
                val = as1.get_object(obj.as1, field)
1✔
578
                if val.keys() == set(['id']) and val['id']:
1✔
579
                    loaded = from_proto.load(val['id'])
1✔
580
                    if loaded and loaded.as1:
1✔
581
                        obj_as1 = {**obj_as1, field: loaded.as1}
1✔
582
        else:
583
            logger.debug(f'Not hydrating actor or author due to source_protocol {obj.source_protocol}')
1✔
584

585
        html = microformats2.activities_to_html([cls.translate_ids(obj_as1)])
1✔
586

587
        # add HTML meta redirect to source page. should trigger for end users in
588
        # browsers but not for webmention receivers (hopefully).
589
        url = util.get_url(obj_as1) or obj_as1.get('id') or obj.key.id()
1✔
590
        if util.is_web(url):
1✔
591
            utf8 = '<meta charset="utf-8">'
1✔
592
            refresh = f'<meta http-equiv="refresh" content="0;url={url}">'
1✔
593
            html = html.replace(utf8, utf8 + '\n' + refresh)
1✔
594

595
        return html
1✔
596

597

598
@app.get('/web-site')
1✔
599
@flask_util.headers(CACHE_CONTROL)
1✔
600
def enter_web_site():
1✔
601
    return render_template('enter_web_site.html')
×
602

603

604
@app.post('/web-site')
1✔
605
def check_web_site():
1✔
606
    logger.info(f'Params: {list(request.form.items())}')
1✔
607

608
    url = request.values['url']
1✔
609

610
    # this normalizes and lower cases domain
611
    try:
1✔
612
        domain = normalize_user_id(id=url, proto=Web)
1✔
613
    except (ValueError, AssertionError):
1✔
614
        logger.info(f'bad web id? {url}', exc_info=True)
1✔
615
        domain = None
1✔
616

617
    if not domain or not is_valid_domain(domain, allow_internal=False):
1✔
618
        flash(f'{url} is not a valid or supported web site')
1✔
619
        return render_template('enter_web_site.html'), 400
1✔
620

621
    if util.is_web(url) and urlparse(url).path.strip('/'):
1✔
622
        flash('Only top-level web sites and domains are supported.')
1✔
623
        return render_template('enter_web_site.html'), 400
1✔
624

625
    try:
1✔
626
        user = Web.get_or_create(domain, enabled_protocols=['atproto'],
1✔
627
                                 propagate=True, direct=True, verify=True)
628
        if not user:  # opted out
1✔
629
            flash(f'{url} is not a valid or supported web site')
1✔
630
            return render_template('enter_web_site.html'), 400
1✔
631
    except BaseException as e:
1✔
632
        code, body = util.interpret_http_exception(e)
1✔
633
        if code:
1✔
634
            flash(f"Couldn't connect to {url}: {e}")
1✔
635
            return render_template('enter_web_site.html')
1✔
636
        raise
×
637

638
    user.put()
1✔
639
    return redirect(user.user_page_path())
1✔
640

641

642
@app.post('/webmention')
1✔
643
def webmention_external():
1✔
644
    """Handles inbound webmention, enqueue task to process.
645

646
    Use a task queue to deliver to followers because we send to each inbox in
647
    serial, which can take a long time with many followers/instances.
648
    """
649
    logger.info(f'Params: {list(request.form.items())}')
1✔
650

651
    source = flask_util.get_required_param('source').strip()
1✔
652
    if Web.owns_id(source) is False:
1✔
653
        error(f'Bad URL {source}')
1✔
654
    elif urlparse(source).scheme != 'https':
1✔
655
        error('source URLs must be https (with SSL)')
1✔
656

657
    domain = util.domain_from_link(source, minimize=False)
1✔
658
    if not domain:
1✔
659
        error(f'Bad source URL {source}')
×
660

661
    user = Web.get_by_id(domain)
1✔
662
    if not user:
1✔
663
        error(f'No user found for domain {domain}')
1✔
664

665
    user.last_webmention_in = util.now()
1✔
666
    user.put()
1✔
667

668
    return common.create_task('webmention', **request.form)
1✔
669

670

671
@app.post(f'/queue/poll-feed')
1✔
672
@cloud_tasks_only
1✔
673
def poll_feed_task():
1✔
674
    """Fetches a :class:`Web` site's feed and delivers new/updated posts.
675

676
    Params:
677
      ``domain`` (str): key id of the :class:`Web` user
678
    """
679
    domain = flask_util.get_required_param('domain')
1✔
680
    logger.info(f'Polling feed for {domain}')
1✔
681

682
    user = Web.get_by_id(domain)
1✔
683
    if not (user and user.obj and user.obj.mf2):
1✔
684
        error(f'No Web user or object found for domain {domain}', status=304)
1✔
685
    elif user.last_webmention_in:
1✔
686
        logger.info(f'Dropping since last_webmention_in is set')
1✔
687
        return 'OK'
1✔
688

689
    # discover feed URL
690
    for url, info in user.obj.mf2.get('rel-urls', {}).items():
1✔
691
        rel_type = FEED_TYPES.get(info.get('type', '').split(';')[0])
1✔
692
        if 'alternate' in info.get('rels', []) and rel_type:
1✔
693
            break
1✔
694
    else:
695
        msg = f"User {user.key.id()} has no feed URL, can't fetch feed"
1✔
696
        logger.info(msg)
1✔
697
        return msg
1✔
698

699
    # fetch feed
700
    headers = {}
1✔
701
    if user.feed_etag:
1✔
702
        headers['If-None-Match'] = user.feed_etag
1✔
703
    if user.feed_last_modified:
1✔
704
        headers['If-Modified-Since'] = user.feed_last_modified
1✔
705
    resp = util.requests_get(url, headers=headers, gateway=True)
1✔
706

707
    content_type = resp.headers.get('Content-Type') or ''
1✔
708
    type = FEED_TYPES.get(content_type.split(';')[0])
1✔
709
    if resp.status_code == 304:
1✔
710
        logger.info('Feed is unchanged since last poll')
1✔
711
        activities = []
1✔
712
    elif type == 'atom' or (type == 'xml' and rel_type == 'atom'):
1✔
713
        try:
1✔
714
            activities = atom.atom_to_activities(resp.text)
1✔
715
        except (ValueError, ElementTree.ParseError) as e:
1✔
716
            # TODO: should probably still create the next poll-feed task
717
            error(f"Couldn't parse feed as Atom: {e}", status=502)
1✔
718
        obj_feed_prop = {'atom': resp.text[:MAX_FEED_PROPERTY_SIZE]}
1✔
719
    elif type == 'rss' or (type == 'xml' and rel_type == 'rss'):
1✔
720
        try:
1✔
721
            activities = rss.to_activities(resp.text)
1✔
722
        except ValueError as e:
×
723
            error(f"Couldn't parse feed as RSS: {e}", status=502)
×
724
        obj_feed_prop = {'rss': resp.text[:MAX_FEED_PROPERTY_SIZE]}
1✔
725
    else:
726
        msg = f'Unknown feed type {content_type}'
1✔
727
        logger.info(msg)
1✔
728
        return msg
1✔
729

730
    if len(activities) > MAX_FEED_ITEMS_PER_POLL:
1✔
731
        logger.info(f'Got {len(activities)} feed items, only processing the first {MAX_FEED_ITEMS_PER_POLL}')
1✔
732
        activities = activities[:MAX_FEED_ITEMS_PER_POLL]
1✔
733

734
    # create Objects and receive tasks
735
    for i, activity in enumerate(activities):
1✔
736
        # default actor and author to user
737
        activity.setdefault('actor', {}).setdefault('id', user.profile_id())
1✔
738
        obj = activity.setdefault('object', {})
1✔
739
        obj.setdefault('author', {}).setdefault('id', user.profile_id())
1✔
740

741
        # use URL as id since some feeds use non-URL (eg tag URI) ids
742
        for elem in obj, activity:
1✔
743
            if url := elem.get('url'):
1✔
744
                elem['id'] = elem['url']
1✔
745

746
        logger.debug(f'Converted to AS1: {json_dumps(activity, indent=2)}')
1✔
747

748
        id = Object(our_as1=activity).as1.get('id')
1✔
749
        if not id:
1✔
750
            logger.warning('No id or URL!')
×
751
            continue
×
752

753
        if i == 0:
1✔
754
            logger.info(f'Setting feed_last_item to {id}')
1✔
755
            user.feed_last_item = id
1✔
756
        elif id == user.feed_last_item:
1✔
757
            logger.info(f'Already seen {id}, skipping rest of feed')
×
758
            break
×
759

760
        if Web.owns_id(id) is False:
1✔
761
            logger.warning(f'Skipping bad id {id}')
×
762
            continue
×
763

764
        if not obj.get('image'):
1✔
765
            # fetch and check the post itself
766
            logger.info(f'No image in {id} , trying metaformats')
1✔
767
            post = Web.load(id, metaformats=True, authorship_fetch_mf2=False)
1✔
768
            if post and post.as1:
1✔
769
                profile_images = (as1.get_ids(user.obj.as1, 'image')
1✔
770
                                  if user.obj.as1 else [])
771
                obj['image'] = [img for img in as1.get_ids(post.as1, 'image')
1✔
772
                                if img not in profile_images]
773

774
        activity['feed_index'] = i
1✔
775
        obj = Object.get_or_create(id=id, authed_as=domain, our_as1=activity,
1✔
776
                                   status='new', source_protocol=Web.ABBREV,
777
                                   users=[user.key], **obj_feed_prop)
778
        common.create_task(queue='receive', obj=obj.key.urlsafe(),
1✔
779
                           authed_as=user.key.id())
780

781
    # determine posting frequency
782
    published_last = None
1✔
783
    published_deltas = []  # timedeltas between entry published times
1✔
784
    for activity in activities:
1✔
785
        published = activity['object'].get('published')
1✔
786
        if published and published_last:
1✔
787
            published_deltas.append(
1✔
788
                abs(util.parse_iso8601(published) -
789
                    util.parse_iso8601(published_last)))
790
        published_last = published
1✔
791

792
    # create next poll task
793
    def clamp(delay):
1✔
794
        return max(min(delay, MAX_FEED_POLL_PERIOD), MIN_FEED_POLL_PERIOD)
1✔
795

796
    if published_deltas:
1✔
797
        delay = clamp(timedelta(seconds=statistics.mean(
1✔
798
            t.total_seconds() for t in published_deltas)))
799
    else:
800
        delay = clamp(util.now() - (user.last_polled_feed
1✔
801
                                    or user.created.replace(tzinfo=timezone.utc)))
802

803
    common.create_task(queue='poll-feed', domain=user.key.id(), delay=delay)
1✔
804

805
    # update user
806
    user.last_polled_feed = util.now()
1✔
807
    user.feed_etag = resp.headers.get('ETag')
1✔
808
    user.feed_last_modified = resp.headers.get('Last-Modified')
1✔
809
    user.put()
1✔
810

811
    return 'OK'
1✔
812

813

814
@app.post('/queue/webmention')
1✔
815
@cloud_tasks_only
1✔
816
def webmention_task():
1✔
817
    """Handles inbound webmention task.
818

819
    Params:
820
      ``source`` (str): URL
821
    """
822
    logger.info(f'Params: {list(request.form.items())}')
1✔
823

824
    # load user
825
    source = flask_util.get_required_param('source').strip()
1✔
826
    domain = util.domain_from_link(source, minimize=False)
1✔
827
    logger.info(f'webmention from {domain}')
1✔
828

829
    user = Web.get_by_id(domain)
1✔
830
    if not user:
1✔
831
        error(f'No user found for domain {domain}', status=304)
×
832
    logger.info(f'User: {user.key.id()}')
1✔
833

834
    # fetch source page
835
    try:
1✔
836
        # remote=True to force fetch, local=True to populate new/changed attrs
837
        obj = Web.load(source, local=True, remote=True,
1✔
838
                       check_backlink=not appengine_info.LOCAL_SERVER)
839
    except BadRequest as e:
1✔
840
        error(str(e.description), status=304)
1✔
841
    except HTTPError as e:
1✔
842
        if e.response.status_code not in (410, 404):
1✔
843
            error(f'{e} ; {e.response.text if e.response else ""}', status=502)
1✔
844

845
        create_id = f'{source}#bridgy-fed-create'
1✔
846
        logger.info(f'Interpreting as Delete. Looking for {create_id}')
1✔
847
        create = Object.get_by_id(create_id)
1✔
848
        if not create or create.status != 'complete':
1✔
849
            error(f"Bridgy Fed hasn't successfully published {source}", status=304)
1✔
850

851
        id = f'{source}#bridgy-fed-delete'
1✔
852
        obj = Object(id=id, status='new', our_as1={
1✔
853
            'id': id,
854
            'objectType': 'activity',
855
            'verb': 'delete',
856
            'actor': user.web_url(),
857
            'object': source,
858
        })
859

860
    if not obj or (not obj.mf2 and obj.type != 'delete'):
1✔
861
        error(f"Couldn't load {source} as microformats2 HTML", status=304)
1✔
862
    elif obj.mf2 and 'h-entry' in obj.mf2.get('type', []):
1✔
863
        authors = obj.mf2['properties'].setdefault('author', [])
1✔
864
        author_urls = microformats2.get_string_urls(authors)
1✔
865
        if not author_urls:
1✔
866
            authors.append(user.web_url())
×
867
        elif not user.is_web_url(author_urls[0]):
1✔
868
            logger.info(f'Overriding author {author_urls[0]} with {user.web_url()}')
1✔
869
            if isinstance(authors[0], dict):
1✔
870
                authors[0]['properties']['url'] = [user.web_url()]
1✔
871
            else:
872
                authors[0] = user.web_url()
×
873

874
    try:
1✔
875
        return Web.receive(obj, authed_as=user.key.id())
1✔
876
    except ValueError as e:
1✔
877
        logger.warning(e, exc_info=True)
×
878
        error(e, status=304)
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc