9244418196

Committed 26 May 2024 03:39PM UTC coverage: 84.467% (-4.8%) from 89.263%

Build # 9244418196

Build Type

push

github

Committed by

web-flow

Commit Message

Add async support for Quart+Flask (#470)

This commit bumps the version number from 0.7 to 0.8 as it has extensive changes:

* Ruff replaces black, isort and flake8 for linting and formatting
* All decorators now support async functions and provide async wrapper implementations
* Some obsolete modules have been removed
* Pagination from Flask-SQLAlchemy is now included, removing that dependency (but still used in tests)
* New `compat` module provides wrappers to both Quart and Flake and is used by all other modules
* Some tests run using Quart. The vast majority of tests are not upgraded, nor are there tests for async decorators, so overall line coverage has dropped significantly. Comprehensive test coverage is still pending; for now we are using Funnel's tests as the extended test suite

Run Details

648 of 1023 new or added lines in 29 files covered. (63.34%)

138 existing lines in 17 files now uncovered.

3948 of 4674 relevant lines covered (84.47%)

3.38 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

93.86

/src/coaster/utils/text.py

"""Text processing utilities."""

from __future__ import annotations

import re
import string
from functools import partial
from html import unescape
from typing import Optional, Union
from xml.etree.ElementTree import Element  # nosec B405

import html5lib
from bleach.linkifier import DEFAULT_CALLBACKS, LinkifyFilter
from bleach.sanitizer import Cleaner
from markupsafe import Markup

__all__ = [
    'VALID_TAGS',
    'LINKIFY_SKIP_TAGS',
    'LINKIFY_CALLBACKS',
    'compress_whitespace',
    'deobfuscate_email',
    'normalize_spaces',
    'normalize_spaces_multiline',
    'sanitize_html',
    'simplify_text',
    'text_blocks',
    'ulstrip',
    'unicode_extended_whitespace',
    'urstrip',
    'ustrip',
]


#: Unicode's list of whitespace characters is missing some that were previously
#: classified as whitespace but are now considered format characters. These are
#: invisible and usually arrive via copy-paste, so we include them here as characters to
#: be replaced with spaces and stripped from the ends of text.
unicode_format_whitespace = (
    '\x85'  # NEXT LINE (NEL)
    '\xa0'  # NO-BREAK SPACE (NBSP)
    '\u1680'  # OGHAM SPACE MARK
    '\u180e'  # MONGOLIAN VOWEL SEPARATOR
    '\u2000'  # EN QUAD
    '\u2001'  # EM QUAD
    '\u2002'  # EN SPACE
    '\u2003'  # EM SPACE
    '\u2004'  # THREE-PER-EM SPACE
    '\u2005'  # FOUR-PER-EM SPACE
    '\u2006'  # SIX-PER-EM SPACE
    '\u2007'  # FIGURE SPACE
    '\u2008'  # PUNCTUATION SPACE
    '\u2009'  # THIN SPACE
    '\u200a'  # HAIR SPACE
    '\u200b'  # ZERO WIDTH SPACE (format)
    '\u200c'  # ZERO WIDTH NON-JOINER (format)
    '\u200d'  # ZERO WIDTH JOINER (format)
    '\u2028'  # LINE SEPARATOR
    '\u2029'  # PARAGRAPH SEPARATOR
    '\u202f'  # NARROW NO-BREAK SPACE (NNBSP)
    '\u205f'  # MEDIUM MATHEMATICAL SPACE (MMSP)
    '\u2060'  # WORD JOINER (format)
    '\u3000'  # IDEOGRAPHIC SPACE
    '\ufeff'  # ZERO WIDTH NO-BREAK SPACE (format)
)

ascii_whitespace = '\t\n\x0b\x0c\r\x1c\x1d\x1e\x1f '
ascii_whitespace_without_newline = '\t\x1f '
unicode_extended_whitespace = ascii_whitespace + unicode_format_whitespace

re_singleline_spaces = re.compile(
    '[' + unicode_extended_whitespace + ']', re.UNICODE | re.MULTILINE
)
re_multiline_spaces = re.compile(
    # This is missing \u2028 and \u2029 (separators)
    '[' + ascii_whitespace_without_newline + unicode_format_whitespace + ']',
    re.UNICODE | re.MULTILINE,
)
re_compress_spaces = re.compile(
    r'[\s' + unicode_format_whitespace + ']+', re.UNICODE | re.MULTILINE
)

VALID_TAGS: dict[str, list[str]] = {
    'a': ['href', 'title', 'target', 'rel'],
    'abbr': ['title'],
    'b': [],
    'br': [],
    'blockquote': [],
    'cite': [],
    'code': [],
    'dd': [],
    'del': [],
    'dl': [],
    'dt': [],
    'em': [],
    'h3': [],
    'h4': [],
    'h5': [],
    'h6': [],
    'hr': [],
    'i': [],
    'img': ['src', 'width', 'height', 'align', 'alt'],
    'ins': [],
    'li': [],
    'mark': [],
    'p': [],
    'pre': [],
    'ol': ['start'],
    'strong': [],
    'sup': [],
    'sub': [],
    'ul': [],
}

LINKIFY_SKIP_TAGS = ['pre', 'code', 'kbd', 'samp', 'var']

# Attrs is described in the Linkify source as {(namespace, name): value}, but the code
# that calls us sets it as `attrs = {(None, "href"): href, "_text": url}`
LinkifyAttrsType = Optional[dict[Union[tuple[Optional[str], str], str], str]]


# Adapted from https://bleach.readthedocs.io/en/latest/linkify.html#preventing-links
def dont_linkify_filenames(
    attrs: LinkifyAttrsType,
    new: bool = False,
) -> LinkifyAttrsType:
    # This is an existing link, so leave it be
    if not new:
        return attrs
    if attrs is None:
        return attrs
    # If the TLD is '.py', make sure it starts with http: or https:.
    # Use _text because that's the original text
    link_text = attrs['_text']
    if link_text.endswith('.py') and not link_text.startswith(('http:', 'https:')):
        # This looks like a Python file, not a URL. Don't make a link.
        return None
    # Everything checks out, keep going to the next callback.
    return attrs


LINKIFY_CALLBACKS = [*DEFAULT_CALLBACKS, dont_linkify_filenames]  # type: ignore[list-item]


def sanitize_html(
    value: str,
    valid_tags: Optional[dict[str, list[str]]] = None,
    strip: bool = True,
    linkify: bool = False,
) -> Markup:
    """Strip unwanted markup out of HTML."""
    if valid_tags is None:
        valid_tags = VALID_TAGS
    if linkify:
        filters = [
            partial(
                LinkifyFilter, callbacks=LINKIFY_CALLBACKS, skip_tags=LINKIFY_SKIP_TAGS
            )
        ]
    else:
        filters = []
    cleaner = Cleaner(
        tags=list(valid_tags.keys()),
        attributes=valid_tags,
        filters=filters,
        strip=strip,
    )
    return Markup(cleaner.clean(value))


blockish_tags = {
    'address',
    'article',
    'aside',
    'audio',
    'blockquote',
    'canvas',
    'dd',
    'div',
    'dl',
    'dt',
    'fieldset',
    'figcaption',
    'figure',
    'footer',
    'form',
    'h1',
    'h2',
    'h3',
    'h4',
    'h5',
    'h6',
    'header',
    'hgroup',
    'hr',
    'li',
    'noscript',
    'ol',
    'output',
    'p',
    'pre',
    'section',
    'table',
    'td',
    'tfoot',
    'th',
    'tr',
    'ul',
    'video',
}


def text_blocks(html_text: str, skip_pre: bool = True) -> list[str]:
    """Extract a list of paragraphs from a given HTML string."""
    doc = html5lib.parseFragment(html_text)
    blocks = []

    def subloop(
        parent_tag: Optional[str], element: Element, lastchild: bool = False
    ) -> None:
        if callable(
            element.tag
        ):  # Comments have a callable tag. TODO: Find out, anything else?
            tag = '<!-->'
            text = ''
            tail = element.tail or ''
        else:
            # Extract tag from namespace: "{http://www.w3.org/1999/xhtml}html"
            tag = element.tag.split('}')[-1]
            text = element.text or ''
            tail = element.tail or ''

        if tag == 'pre' and skip_pre:
            text = ''

        if tag in blockish_tags or tag == 'DOCUMENT_FRAGMENT':
            text = text.lstrip()  # Leading whitespace is insignificant in a block tag
            if len(element) == 0:
                # No children? Then trailing whitespace is insignificant
                text = text.rstrip()
            # If there's text, add it.
            # If there's no text but the next element is not a block tag, add a blank
            # anyway (unless it's a pre tag and we want to skip_pre, in which case
            # ignore it again).
            if text:
                blocks.append(text)
            elif (
                len(element)
                and isinstance(element[0].tag, str)
                and element[0].tag.split('}')[-1] not in blockish_tags
                and not (skip_pre and tag == 'pre')
            ):
                blocks.append('')
        elif not blocks:
            if text:
                blocks.append(text)
        else:
            blocks[-1] += text

        if len(element) > 0 and not (skip_pre and tag == 'pre'):
            for child in element[:-1]:
                subloop(tag, child)
            subloop(tag, element[-1], lastchild=True)

        if tag in blockish_tags:
            # Leading whitespace is insignificant after a block tag
            tail = tail.lstrip()
            if tail:
                blocks.append(tail)
        else:
            if parent_tag in blockish_tags and lastchild:
                # Trailing whitespace is insignificant before a block tag end
                tail = tail.rstrip()
            if not blocks:
                if tail:
                    blocks.append(tail)
            elif tag == 'br' and tail:
                blocks[-1] += '\n' + tail
            else:
                blocks[-1] += tail

    subloop(None, doc)
    # Replace &nbsp; with ' '
    return [t.replace('\xa0', ' ') for t in blocks]


def normalize_spaces(text: str) -> str:
    """Replace whitespace characters with regular spaces."""
    return re_singleline_spaces.sub(' ', text)


def normalize_spaces_multiline(text: str) -> str:
    """
    Replace whitespace characters with regular spaces, in multiline text.

    Line break characters like newlines are not considered whitespace.
    """
    return re_multiline_spaces.sub(' ', text)


def ulstrip(text: str) -> str:
    """Strip Unicode extended whitespace from the left side of a string."""
    return text.lstrip(unicode_extended_whitespace)


def urstrip(text: str) -> str:
    """Strip Unicode extended whitespace from the right side of a string."""
    return text.rstrip(unicode_extended_whitespace)


def ustrip(text: str) -> str:
    """Strip Unicode extended whitespace from a string."""
    return text.strip(unicode_extended_whitespace)


def compress_whitespace(text: str) -> str:
    """Reduce all space-like characters into single spaces and strip from ends."""
    return ustrip(re_compress_spaces.sub(' ', text))


# Based on http://jasonpriem.org/obfuscation-decoder/
_deobfuscate_dot1_re = re.compile(r'\W+\.\W+|\W+dot\W+|\W+d0t\W+', re.U | re.I)
_deobfuscate_dot2_re = re.compile(r'([a-z0-9])DOT([a-z0-9])')
_deobfuscate_dot3_re = re.compile(r'([A-Z0-9])dot([A-Z0-9])')
_deobfuscate_at1_re = re.compile(r'\W*@\W*|\W+at\W+', re.U | re.I)
_deobfuscate_at2_re = re.compile(r'([a-z0-9])AT([a-z0-9])')
_deobfuscate_at3_re = re.compile(r'([A-Z0-9])at([A-Z0-9])')


def deobfuscate_email(text: str) -> str:
    """Deobfuscate email addresses in provided text."""
    text = unescape(text)
    # Find the "dot"
    text = _deobfuscate_dot1_re.sub('.', text)
    text = _deobfuscate_dot2_re.sub(r'\1.\2', text)
    text = _deobfuscate_dot3_re.sub(r'\1.\2', text)
    # Find the "at"
    text = _deobfuscate_at1_re.sub('@', text)
    text = _deobfuscate_at2_re.sub(r'\1@\2', text)
    return _deobfuscate_at3_re.sub(r'\1@\2', text)


def simplify_text(text: str) -> str:
    """
    Simplify text to allow comparison.

    >>> simplify_text("Awesome Coder wanted at Awesome Company")
    'awesome coder wanted at awesome company'
    >>> simplify_text("Awesome Coder, wanted  at Awesome Company! ")
    'awesome coder wanted at awesome company'
    >>> simplify_text("Awesome Coder, wanted  at Awesome Company! ") == (
    ...     'awesome coder wanted at awesome company'
    ... )
    True
    """
    text = text.translate(text.maketrans('', '', string.punctuation)).lower()
    return ' '.join(text.split())

1	"""Text processing utilities."""	4✔
2
3	from __future__ import annotations	4✔
4
5	import re	4✔
6	import string	4✔
7	from functools import partial	4✔
8	from html import unescape	4✔
9	from typing import Optional, Union	4✔
10	from xml.etree.ElementTree import Element # nosec B405	4✔
11
12	import html5lib	4✔
13	from bleach.linkifier import DEFAULT_CALLBACKS, LinkifyFilter	4✔
14	from bleach.sanitizer import Cleaner	4✔
15	from markupsafe import Markup	4✔
16
17	__all__ = [	4✔
18	'VALID_TAGS',
19	'LINKIFY_SKIP_TAGS',
20	'LINKIFY_CALLBACKS',
21	'compress_whitespace',
22	'deobfuscate_email',
23	'normalize_spaces',
24	'normalize_spaces_multiline',
25	'sanitize_html',
26	'simplify_text',
27	'text_blocks',
28	'ulstrip',
29	'unicode_extended_whitespace',
30	'urstrip',
31	'ustrip',
32	]
33
34
35	#: Unicode's list of whitespace characters is missing some that were previously
36	#: classified as whitespace but are now considered format characters. These are
37	#: invisible and usually arrive via copy-paste, so we include them here as characters to
38	#: be replaced with spaces and stripped from the ends of text.
39	unicode_format_whitespace = (	4✔
40	'\x85' # NEXT LINE (NEL)
41	'\xa0' # NO-BREAK SPACE (NBSP)
42	'\u1680' # OGHAM SPACE MARK
43	'\u180e' # MONGOLIAN VOWEL SEPARATOR
44	'\u2000' # EN QUAD
45	'\u2001' # EM QUAD
46	'\u2002' # EN SPACE
47	'\u2003' # EM SPACE
48	'\u2004' # THREE-PER-EM SPACE
49	'\u2005' # FOUR-PER-EM SPACE
50	'\u2006' # SIX-PER-EM SPACE
51	'\u2007' # FIGURE SPACE
52	'\u2008' # PUNCTUATION SPACE
53	'\u2009' # THIN SPACE
54	'\u200a' # HAIR SPACE
55	'\u200b' # ZERO WIDTH SPACE (format)
56	'\u200c' # ZERO WIDTH NON-JOINER (format)
57	'\u200d' # ZERO WIDTH JOINER (format)
58	'\u2028' # LINE SEPARATOR
59	'\u2029' # PARAGRAPH SEPARATOR
60	'\u202f' # NARROW NO-BREAK SPACE (NNBSP)
61	'\u205f' # MEDIUM MATHEMATICAL SPACE (MMSP)
62	'\u2060' # WORD JOINER (format)
63	'\u3000' # IDEOGRAPHIC SPACE
64	'\ufeff' # ZERO WIDTH NO-BREAK SPACE (format)
65	)
66
67	ascii_whitespace = '\t\n\x0b\x0c\r\x1c\x1d\x1e\x1f '	4✔
68	ascii_whitespace_without_newline = '\t\x1f '	4✔
69	unicode_extended_whitespace = ascii_whitespace + unicode_format_whitespace	4✔
70
71	re_singleline_spaces = re.compile(	4✔
72	'[' + unicode_extended_whitespace + ']', re.UNICODE \| re.MULTILINE
73	)
74	re_multiline_spaces = re.compile(	4✔
75	# This is missing \u2028 and \u2029 (separators)
76	'[' + ascii_whitespace_without_newline + unicode_format_whitespace + ']',
77	re.UNICODE \| re.MULTILINE,
78	)
79	re_compress_spaces = re.compile(	4✔
80	r'[\s' + unicode_format_whitespace + ']+', re.UNICODE \| re.MULTILINE
81	)
82
83	VALID_TAGS: dict[str, list[str]] = {	4✔
84	'a': ['href', 'title', 'target', 'rel'],
85	'abbr': ['title'],
86	'b': [],
87	'br': [],
88	'blockquote': [],
89	'cite': [],
90	'code': [],
91	'dd': [],
92	'del': [],
93	'dl': [],
94	'dt': [],
95	'em': [],
96	'h3': [],
97	'h4': [],
98	'h5': [],
99	'h6': [],
100	'hr': [],
101	'i': [],
102	'img': ['src', 'width', 'height', 'align', 'alt'],
103	'ins': [],
104	'li': [],
105	'mark': [],
106	'p': [],
107	'pre': [],
108	'ol': ['start'],
109	'strong': [],
110	'sup': [],
111	'sub': [],
112	'ul': [],
113	}
114
115	LINKIFY_SKIP_TAGS = ['pre', 'code', 'kbd', 'samp', 'var']	4✔
116
117	# Attrs is described in the Linkify source as {(namespace, name): value}, but the code
118	# that calls us sets it as `attrs = {(None, "href"): href, "_text": url}`
119	LinkifyAttrsType = Optional[dict[Union[tuple[Optional[str], str], str], str]]	4✔
120
121
122	# Adapted from https://bleach.readthedocs.io/en/latest/linkify.html#preventing-links
123	def dont_linkify_filenames(	4✔
124	attrs: LinkifyAttrsType,
125	new: bool = False,
126	) -> LinkifyAttrsType:
127	# This is an existing link, so leave it be
128	if not new:	4✔
129	return attrs	4✔
130	if attrs is None:	4✔
UNCOV 131	return attrs	×
132	# If the TLD is '.py', make sure it starts with http: or https:.
133	# Use _text because that's the original text
134	link_text = attrs['_text']	4✔
135	if link_text.endswith('.py') and not link_text.startswith(('http:', 'https:')):	4✔
136	# This looks like a Python file, not a URL. Don't make a link.
137	return None	4✔
138	# Everything checks out, keep going to the next callback.
139	return attrs	4✔
140
141
142	LINKIFY_CALLBACKS = [*DEFAULT_CALLBACKS, dont_linkify_filenames] # type: ignore[list-item]	4✔
143
144
145	def sanitize_html(	4✔
146	value: str,
147	valid_tags: Optional[dict[str, list[str]]] = None,
148	strip: bool = True,
149	linkify: bool = False,
150	) -> Markup:
151	"""Strip unwanted markup out of HTML."""
152	if valid_tags is None:	4✔
153	valid_tags = VALID_TAGS	4✔
154	if linkify:	4✔
155	filters = [	4✔
156	partial(
157	LinkifyFilter, callbacks=LINKIFY_CALLBACKS, skip_tags=LINKIFY_SKIP_TAGS
158	)
159	]
160	else:
161	filters = []	4✔
162	cleaner = Cleaner(	4✔
163	tags=list(valid_tags.keys()),
164	attributes=valid_tags,
165	filters=filters,
166	strip=strip,
167	)
168	return Markup(cleaner.clean(value))	4✔
169
170
171	blockish_tags = {	4✔
172	'address',
173	'article',
174	'aside',
175	'audio',
176	'blockquote',
177	'canvas',
178	'dd',
179	'div',
180	'dl',
181	'dt',
182	'fieldset',
183	'figcaption',
184	'figure',
185	'footer',
186	'form',
187	'h1',
188	'h2',
189	'h3',
190	'h4',
191	'h5',
192	'h6',
193	'header',
194	'hgroup',
195	'hr',
196	'li',
197	'noscript',
198	'ol',
199	'output',
200	'p',
201	'pre',
202	'section',
203	'table',
204	'td',
205	'tfoot',
206	'th',
207	'tr',
208	'ul',
209	'video',
210	}
211
212
213	def text_blocks(html_text: str, skip_pre: bool = True) -> list[str]:	4✔
214	"""Extract a list of paragraphs from a given HTML string."""
215	doc = html5lib.parseFragment(html_text)	4✔
216	blocks = []	4✔
217
218	def subloop(	4✔
219	parent_tag: Optional[str], element: Element, lastchild: bool = False
220	) -> None:
221	if callable(	4✔
222	element.tag
223	): # Comments have a callable tag. TODO: Find out, anything else?
224	tag = '<!-->'	4✔
225	text = ''	4✔
226	tail = element.tail or ''	4✔
227	else:
228	# Extract tag from namespace: "{http://www.w3.org/1999/xhtml}html"
229	tag = element.tag.split('}')[-1]	4✔
230	text = element.text or ''	4✔
231	tail = element.tail or ''	4✔
232
233	if tag == 'pre' and skip_pre:	4✔
234	text = ''	4✔
235
236	if tag in blockish_tags or tag == 'DOCUMENT_FRAGMENT':	4✔
237	text = text.lstrip() # Leading whitespace is insignificant in a block tag	4✔
238	if len(element) == 0:	4✔
239	# No children? Then trailing whitespace is insignificant
240	text = text.rstrip()	4✔
241	# If there's text, add it.
242	# If there's no text but the next element is not a block tag, add a blank
243	# anyway (unless it's a pre tag and we want to skip_pre, in which case
244	# ignore it again).
245	if text:	4✔
246	blocks.append(text)	4✔
247	elif (	4✔
248	len(element)
249	and isinstance(element[0].tag, str)
250	and element[0].tag.split('}')[-1] not in blockish_tags
251	and not (skip_pre and tag == 'pre')
252	):
253	blocks.append('')	4✔
254	elif not blocks:	4✔
NEW 255	if text:	×
NEW 256	blocks.append(text)	×
257	else:
258	blocks[-1] += text	4✔
259
260	if len(element) > 0 and not (skip_pre and tag == 'pre'):	4✔
261	for child in element[:-1]:	4✔
262	subloop(tag, child)	4✔
263	subloop(tag, element[-1], lastchild=True)	4✔
264
265	if tag in blockish_tags:	4✔
266	# Leading whitespace is insignificant after a block tag
267	tail = tail.lstrip()	4✔
268	if tail:	4✔
UNCOV 269	blocks.append(tail)	×
270	else:
271	if parent_tag in blockish_tags and lastchild:	4✔
272	# Trailing whitespace is insignificant before a block tag end
273	tail = tail.rstrip()	4✔
274	if not blocks:	4✔
UNCOV 275	if tail:	×
276	blocks.append(tail)	×
277	elif tag == 'br' and tail:	4✔
278	blocks[-1] += '\n' + tail	4✔
279	else:
280	blocks[-1] += tail	4✔
281
282	subloop(None, doc)	4✔
283	# Replace   with ' '
284	return [t.replace('\xa0', ' ') for t in blocks]	4✔
285
286
287	def normalize_spaces(text: str) -> str:	4✔
288	"""Replace whitespace characters with regular spaces."""
UNCOV 289	return re_singleline_spaces.sub(' ', text)	×
290
291
292	def normalize_spaces_multiline(text: str) -> str:	4✔
293	"""
294	Replace whitespace characters with regular spaces, in multiline text.
295
296	Line break characters like newlines are not considered whitespace.
297	"""
298	return re_multiline_spaces.sub(' ', text)	4✔
299
300
301	def ulstrip(text: str) -> str:	4✔
302	"""Strip Unicode extended whitespace from the left side of a string."""
303	return text.lstrip(unicode_extended_whitespace)	4✔
304
305
306	def urstrip(text: str) -> str:	4✔
307	"""Strip Unicode extended whitespace from the right side of a string."""
308	return text.rstrip(unicode_extended_whitespace)	4✔
309
310
311	def ustrip(text: str) -> str:	4✔
312	"""Strip Unicode extended whitespace from a string."""
313	return text.strip(unicode_extended_whitespace)	4✔
314
315
316	def compress_whitespace(text: str) -> str:	4✔
317	"""Reduce all space-like characters into single spaces and strip from ends."""
318	return ustrip(re_compress_spaces.sub(' ', text))	4✔
319
320
321	# Based on http://jasonpriem.org/obfuscation-decoder/
322	_deobfuscate_dot1_re = re.compile(r'\W+\.\W+\|\W+dot\W+\|\W+d0t\W+', re.U \| re.I)	4✔
323	_deobfuscate_dot2_re = re.compile(r'([a-z0-9])DOT([a-z0-9])')	4✔
324	_deobfuscate_dot3_re = re.compile(r'([A-Z0-9])dot([A-Z0-9])')	4✔
325	_deobfuscate_at1_re = re.compile(r'\W@\W\|\W+at\W+', re.U \| re.I)	4✔
326	_deobfuscate_at2_re = re.compile(r'([a-z0-9])AT([a-z0-9])')	4✔
327	_deobfuscate_at3_re = re.compile(r'([A-Z0-9])at([A-Z0-9])')	4✔
328
329
330	def deobfuscate_email(text: str) -> str:	4✔
331	"""Deobfuscate email addresses in provided text."""
332	text = unescape(text)	4✔
333	# Find the "dot"
334	text = _deobfuscate_dot1_re.sub('.', text)	4✔
335	text = _deobfuscate_dot2_re.sub(r'\1.\2', text)	4✔
336	text = _deobfuscate_dot3_re.sub(r'\1.\2', text)	4✔
337	# Find the "at"
338	text = _deobfuscate_at1_re.sub('@', text)	4✔
339	text = _deobfuscate_at2_re.sub(r'\1@\2', text)	4✔
340	return _deobfuscate_at3_re.sub(r'\1@\2', text)	4✔
341
342
343	def simplify_text(text: str) -> str:	4✔
344	"""
345	Simplify text to allow comparison.
346
347	>>> simplify_text("Awesome Coder wanted at Awesome Company")
348	'awesome coder wanted at awesome company'
349	>>> simplify_text("Awesome Coder, wanted at Awesome Company! ")
350	'awesome coder wanted at awesome company'
351	>>> simplify_text("Awesome Coder, wanted at Awesome Company! ") == (
352	... 'awesome coder wanted at awesome company'
353	... )
354	True
355	"""
356	text = text.translate(text.maketrans('', '', string.punctuation)).lower()	4✔
357	return ' '.join(text.split())	4✔

hasgeek / coaster / 9244418196

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous