21794459388

Committed 08 Feb 2026 07:36AM UTC coverage: 65.436% (+14.7%) from 50.687%

Build # 21794459388

Build Type

push

github

Committed by

web-flow

Commit Message

build: fix tox failures (#40)

Run Details

16 of 23 new or added lines in 4 files covered. (69.57%)

155 existing lines in 1 file now uncovered.

638 of 975 relevant lines covered (65.44%)

0.65 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

61.96

/authcaptureproxy/examples/modifiers.py

#  SPDX-License-Identifier: Apache-2.0
"""
Python Package for auth capture proxy.

Example modifiers.
"""
import logging
import re
from functools import partial
from typing import Callable, Dict, List, Optional, Text

from bs4 import BeautifulSoup  # type: ignore[import]
from yarl import URL

from authcaptureproxy.helper import prepend_url, replace_empty_url, run_func, swap_url

_LOGGER = logging.getLogger(__name__)


def autofill(items: dict, html: Text) -> Text:
    """Autofill input tags in form.

    WARNING: This modifier does not obfuscate debug logs.

    Args:
        items (dict): Dictionary of values to fill. The key the name or id of the form input to fill and the value is the value.
        html (Text): html to convert

    Returns:
        Text: html with values filled in

    """
    soup: BeautifulSoup = BeautifulSoup(html, "html.parser")
    if not soup or not html:
        _LOGGER.debug("Soup is empty")
        return ""
    if not items:
        _LOGGER.debug("No items specified; no modifications made")
        return html
    for item, value in items.items():
        for html_tag in soup.find_all(attrs={"name": item}) + soup.find_all(attrs={"id": item}):
            if not html_tag.get("value"):
                html_tag["value"] = value
                _LOGGER.debug("Filled %s", str(html_tag))
    return str(soup)


async def replace_matching_urls(old_url: URL, new_url: URL, html: Text) -> Text:
    """Replace urls or parts of a url.

    Args:
        old_url (URL): Old url to find and replace. If there is any additional path, it will be added to the new_url.
        new_url (URL): New url to replace.
        html (Text): Text to replace

    Returns:
        Text: Replaced text
    """
    if not old_url or not new_url:
        _LOGGER.debug("No old_url or new_url specified; not modifying")
        return html

    return await find_urls_bs4(
        partial(
            swap_url,
            ignore_query=True,
            old_url=old_url,
            new_url=new_url,
        ),
        search={},
        exceptions={},
        html=html,
    )


async def replace_empty_action_urls(new_url: URL, html: Text) -> Text:
    """Replace urls of empty action attributes.

    For example, <form id="form" method="post" novalidate action="">

    Args:
        new_url (URL): New url to replace.
        html (Text): Text to replace

    Returns:
        Text: Replaced text
    """
    if not new_url:
        _LOGGER.debug("No new_url specified; not modifying")
        return html

    return await find_urls_bs4(
        partial(
            replace_empty_url,
            new_url=new_url,
        ),
        search={"form": "action"},
        exceptions={},
        html=html,
    )


async def prepend_relative_urls(base_url: URL, html: Text) -> Text:
    """Prepend relative urls with url host.

    This is intended to be used for to place the proxy_url in front of relative urls in src="/

    Args:
        base_url (URL): Base URL to prepend
        html (Text): Text to replace

    Returns:
        Text: Replaced text
    """
    if not base_url:
        _LOGGER.debug("No base_url specified")
        return html
    return await find_urls_bs4(partial(prepend_url, base_url), search={}, exceptions={}, html=html)


async def find_regex_urls(
    modifier: Optional[Callable] = None,
    patterns: Optional[Dict[Text, Text]] = None,
    html: Text = "",
) -> Text:
    """Find urls for based on regex.

    Seen in Tesla login with MFA enabled.

    Args:
        modifier (Optional[Callable], optional): The function to call. It will take in the html_tag, tag, and attribute and modify the html_tag. Defaults to None.
        patterns ([Dict[Text,Text]): A dictionary of regex patterns to search. Key is name and value is regex string.
        html (Text, optional): Text to replace. Defaults to "".

    Returns:
        Text: Text after applying the modifier to the urls found using the search.
    """
    patterns = patterns or {}
    if not html:
        _LOGGER.debug("html is empty")
        return ""
    if not modifier:
        _LOGGER.debug("No modifier provided; returning unmodified")
        return html
    if not patterns:
        _LOGGER.debug("No patterns provided; returning unmodified")
        return html
    for name, pattern in patterns.items():
        s = re.findall(pattern, html, re.IGNORECASE)
        _LOGGER.debug("Found %s patterns for %s", len(s), name)
        for url_string in s:
            new_url: URL = await run_func(modifier, name="", url=URL(url_string))
            _LOGGER.debug("Replacing %s -> %s", url_string, str(new_url))
            html = re.sub(re.escape(url_string), str(new_url), html, count=0, flags=re.IGNORECASE)
    return html


async def find_urls_bs4(
    modifier: Optional[Callable] = None,
    search: Optional[Dict[Text, Text]] = None,
    exceptions: Optional[Dict[Text, List[Text]]] = None,
    html: Text = "",
) -> Text:
    """Find urls in html using bs4.

    This function will search using beautifulsoup.find_all() and then apply the modifier function to the found url.

    Args:
        modifier (Optional[Callable], optional): The function to call. It will take in the html_tag, tag, and attribute and modify the html_tag. Defaults to None.
        search (Dict[Text, Text], optional): Search dictionary where keys is a tag and the value is an attribute. Defaults to {}.
        exceptions (Dict[Text, List[Text]], optional): Exceptions dictionary where keys is a tag and the value is a url to not modify. Defaults to {}.
        html (Text, optional): Text to replace. Defaults to "".

    Returns:
        Text: Text after applying the modifier to the urls found using the search.
    """
    search = search or {}
    exceptions = exceptions or {}
    soup: BeautifulSoup = BeautifulSoup(html, "html.parser")
    if not html:
        _LOGGER.debug("Soup is empty")
        return ""
    if not modifier:
        _LOGGER.debug("No modifier provided; returning unmodified")
        return html
    for nested_html in soup.find_all("script", type="text/html"):
        if nested_html.contents:
            _LOGGER.debug(
                "Found %s nested html content, searching nested content", len(nested_html.contents)
            )
        for content in nested_html.contents:
            content.replace_with(await find_urls_bs4(modifier, search, exceptions, str(content)))
    search = search or {
        "script": "src",
        "link": "href",
        "form": "action",
        "a": "href",
        "style=True": "style",
        "img": "src",
    }
    exceptions = exceptions or {"script": ["void(0)"], "form": ["get"], "a": ["javascript:void(0)"]}
    for tag, attribute in search.items():
        for html_tag in soup.find_all(tag):
            if tag == "style=True":
                # handle inline css background image urls
                # https://developer.mozilla.org/en-US/docs/Web/CSS/background-image
                # this currently only handles background-image as the first attribute
                # TODO: Rewrite regex to handle general case
                pattern = r"(?<=style=[\"']background-image:url\([\"']).*(?=[\"']\))"
                attribute_value = html_tag.get(attribute)
                url: Optional[URL] = URL(str(re.search(pattern, attribute_value)))
                if url is not None and url not in exceptions.get(tag, []):
                    new_value = re.sub(
                        pattern,
                        await run_func(modifier, name="", url=url),
                        attribute_value,
                    )
                    old_value = html_tag[attribute]
                    html_tag[attribute] = new_value
                    if str(old_value) != str(html_tag[attribute]):
                        _LOGGER.debug(
                            "Modified url for style:background-image %s -> %s",
                            url,
                            html_tag[attribute],
                        )
            else:
                url = URL(html_tag.get(attribute)) if html_tag.get(attribute) is not None else None
                if (
                    url is not None
                    and not str(url).startswith("data:")
                    and str(url) not in exceptions.get(tag, [])
                ):
                    old_value = html_tag[attribute]
                    html_tag[attribute] = await run_func(modifier, name="", url=url)
                    if str(old_value) != str(html_tag[attribute]):
                        _LOGGER.debug(
                            "Modified url for %s:%s %s -> %s",
                            tag,
                            attribute,
                            url,
                            html_tag[attribute],
                        )
    return str(soup)

1	# SPDX-License-Identifier: Apache-2.0
2	"""
3	Python Package for auth capture proxy.
4
5	Example modifiers.
6	"""
7	import logging	1✔
8	import re	1✔
9	from functools import partial	1✔
10	from typing import Callable, Dict, List, Optional, Text	1✔
11
12	from bs4 import BeautifulSoup # type: ignore[import]	1✔
13	from yarl import URL	1✔
14
15	from authcaptureproxy.helper import prepend_url, replace_empty_url, run_func, swap_url	1✔
16
17	_LOGGER = logging.getLogger(__name__)	1✔
18
19
20	def autofill(items: dict, html: Text) -> Text:	1✔
21	"""Autofill input tags in form.
22
23	WARNING: This modifier does not obfuscate debug logs.
24
25	Args:
26	items (dict): Dictionary of values to fill. The key the name or id of the form input to fill and the value is the value.
27	html (Text): html to convert
28
29	Returns:
30	Text: html with values filled in
31
32	"""
33	soup: BeautifulSoup = BeautifulSoup(html, "html.parser")	1✔
34	if not soup or not html:	1✔
35	_LOGGER.debug("Soup is empty")	1✔
36	return ""	1✔
37	if not items:	1✔
38	_LOGGER.debug("No items specified; no modifications made")	1✔
39	return html	1✔
40	for item, value in items.items():	1✔
41	for html_tag in soup.find_all(attrs={"name": item}) + soup.find_all(attrs={"id": item}):	1✔
42	if not html_tag.get("value"):	1✔
43	html_tag["value"] = value	1✔
44	_LOGGER.debug("Filled %s", str(html_tag))	1✔
45	return str(soup)	1✔
46
47
48	async def replace_matching_urls(old_url: URL, new_url: URL, html: Text) -> Text:	1✔
49	"""Replace urls or parts of a url.
50
51	Args:
52	old_url (URL): Old url to find and replace. If there is any additional path, it will be added to the new_url.
53	new_url (URL): New url to replace.
54	html (Text): Text to replace
55
56	Returns:
57	Text: Replaced text
58	"""
59	if not old_url or not new_url:	1✔
60	_LOGGER.debug("No old_url or new_url specified; not modifying")	1✔
61	return html	1✔
62
63	return await find_urls_bs4(	1✔
64	partial(
65	swap_url,
66	ignore_query=True,
67	old_url=old_url,
68	new_url=new_url,
69	),
70	search={},
71	exceptions={},
72	html=html,
73	)
74
75
76	async def replace_empty_action_urls(new_url: URL, html: Text) -> Text:	1✔
77	"""Replace urls of empty action attributes.
78
79	For example, <form id="form" method="post" novalidate action="">
80
81	Args:
82	new_url (URL): New url to replace.
83	html (Text): Text to replace
84
85	Returns:
86	Text: Replaced text
87	"""
88	if not new_url:	1✔
89	_LOGGER.debug("No new_url specified; not modifying")	1✔
90	return html	1✔
91
92	return await find_urls_bs4(	1✔
93	partial(
94	replace_empty_url,
95	new_url=new_url,
96	),
97	search={"form": "action"},
98	exceptions={},
99	html=html,
100	)
101
102
103	async def prepend_relative_urls(base_url: URL, html: Text) -> Text:	1✔
104	"""Prepend relative urls with url host.
105
106	This is intended to be used for to place the proxy_url in front of relative urls in src="/
107
108	Args:
109	base_url (URL): Base URL to prepend
110	html (Text): Text to replace
111
112	Returns:
113	Text: Replaced text
114	"""
115	if not base_url:	1✔
116	_LOGGER.debug("No base_url specified")	1✔
117	return html	1✔
118	return await find_urls_bs4(partial(prepend_url, base_url), search={}, exceptions={}, html=html)	1✔
119
120
121	async def find_regex_urls(	1✔
122	modifier: Optional[Callable] = None,
123	patterns: Optional[Dict[Text, Text]] = None,
124	html: Text = "",
125	) -> Text:
126	"""Find urls for based on regex.
127
128	Seen in Tesla login with MFA enabled.
129
130	Args:
131	modifier (Optional[Callable], optional): The function to call. It will take in the html_tag, tag, and attribute and modify the html_tag. Defaults to None.
132	patterns ([Dict[Text,Text]): A dictionary of regex patterns to search. Key is name and value is regex string.
133	html (Text, optional): Text to replace. Defaults to "".
134
135	Returns:
136	Text: Text after applying the modifier to the urls found using the search.
137	"""
138	patterns = patterns or {}	×
139	if not html:	×
140	_LOGGER.debug("html is empty")	×
141	return ""	×
142	if not modifier:	×
143	_LOGGER.debug("No modifier provided; returning unmodified")	×
144	return html	×
145	if not patterns:	×
146	_LOGGER.debug("No patterns provided; returning unmodified")	×
147	return html	×
148	for name, pattern in patterns.items():	×
149	s = re.findall(pattern, html, re.IGNORECASE)	×
150	_LOGGER.debug("Found %s patterns for %s", len(s), name)	×
151	for url_string in s:	×
152	new_url: URL = await run_func(modifier, name="", url=URL(url_string))	×
153	_LOGGER.debug("Replacing %s -> %s", url_string, str(new_url))	×
NEW 154	html = re.sub(re.escape(url_string), str(new_url), html, count=0, flags=re.IGNORECASE)	×
155	return html	×
156
157
158	async def find_urls_bs4(	1✔
159	modifier: Optional[Callable] = None,
160	search: Optional[Dict[Text, Text]] = None,
161	exceptions: Optional[Dict[Text, List[Text]]] = None,
162	html: Text = "",
163	) -> Text:
164	"""Find urls in html using bs4.
165
166	This function will search using beautifulsoup.find_all() and then apply the modifier function to the found url.
167
168	Args:
169	modifier (Optional[Callable], optional): The function to call. It will take in the html_tag, tag, and attribute and modify the html_tag. Defaults to None.
170	search (Dict[Text, Text], optional): Search dictionary where keys is a tag and the value is an attribute. Defaults to {}.
171	exceptions (Dict[Text, List[Text]], optional): Exceptions dictionary where keys is a tag and the value is a url to not modify. Defaults to {}.
172	html (Text, optional): Text to replace. Defaults to "".
173
174	Returns:
175	Text: Text after applying the modifier to the urls found using the search.
176	"""
177	search = search or {}	1✔
178	exceptions = exceptions or {}	1✔
179	soup: BeautifulSoup = BeautifulSoup(html, "html.parser")	1✔
180	if not html:	1✔
181	_LOGGER.debug("Soup is empty")	×
182	return ""	×
183	if not modifier:	1✔
184	_LOGGER.debug("No modifier provided; returning unmodified")	×
185	return html	×
186	for nested_html in soup.find_all("script", type="text/html"):	1✔
187	if nested_html.contents:	×
188	_LOGGER.debug(	×
189	"Found %s nested html content, searching nested content", len(nested_html.contents)
190	)
191	for content in nested_html.contents:	×
192	content.replace_with(await find_urls_bs4(modifier, search, exceptions, str(content)))	×
193	search = search or {	1✔
194	"script": "src",
195	"link": "href",
196	"form": "action",
197	"a": "href",
198	"style=True": "style",
199	"img": "src",
200	}
201	exceptions = exceptions or {"script": ["void(0)"], "form": ["get"], "a": ["javascript:void(0)"]}	1✔
202	for tag, attribute in search.items():	1✔
203	for html_tag in soup.find_all(tag):	1✔
204	if tag == "style=True":	1✔
205	# handle inline css background image urls
206	# https://developer.mozilla.org/en-US/docs/Web/CSS/background-image
207	# this currently only handles background-image as the first attribute
208	# TODO: Rewrite regex to handle general case
209	pattern = r"(?<=style=[\"']background-image:url\([\"']).*(?=[\"']\))"	×
210	attribute_value = html_tag.get(attribute)	×
211	url: Optional[URL] = URL(str(re.search(pattern, attribute_value)))	×
212	if url is not None and url not in exceptions.get(tag, []):	×
213	new_value = re.sub(	×
214	pattern,
215	await run_func(modifier, name="", url=url),
216	attribute_value,
217	)
218	old_value = html_tag[attribute]	×
219	html_tag[attribute] = new_value	×
220	if str(old_value) != str(html_tag[attribute]):	×
221	_LOGGER.debug(	×
222	"Modified url for style:background-image %s -> %s",
223	url,
224	html_tag[attribute],
225	)
226	else:
227	url = URL(html_tag.get(attribute)) if html_tag.get(attribute) is not None else None	1✔
228	if (	1✔
229	url is not None
230	and not str(url).startswith("data:")
231	and str(url) not in exceptions.get(tag, [])
232	):
233	old_value = html_tag[attribute]	1✔
234	html_tag[attribute] = await run_func(modifier, name="", url=url)	1✔
235	if str(old_value) != str(html_tag[attribute]):	1✔
236	_LOGGER.debug(	1✔
237	"Modified url for %s:%s %s -> %s",
238	tag,
239	attribute,
240	url,
241	html_tag[attribute],
242	)
243	return str(soup)	1✔

alandtse / auth_capture_proxy / 21794459388

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous