diff options
Diffstat (limited to 'examples/complex/xss_scanner.py')
-rwxr-xr-x | examples/complex/xss_scanner.py | 87 |
1 files changed, 46 insertions, 41 deletions
diff --git a/examples/complex/xss_scanner.py b/examples/complex/xss_scanner.py index 4b35c6c1..0ee38cd4 100755 --- a/examples/complex/xss_scanner.py +++ b/examples/complex/xss_scanner.py @@ -35,14 +35,17 @@ Line: 1029zxcs'd"ao<ac>so[sb]po(pc)se;sl/bsl\eq=3847asd """ -from mitmproxy import ctx +from html.parser import HTMLParser +from typing import Dict, Union, Tuple, Optional, List, NamedTuple from socket import gaierror, gethostbyname from urllib.parse import urlparse -import requests import re -from html.parser import HTMLParser + +import requests + from mitmproxy import http -from typing import Dict, Union, Tuple, Optional, List, NamedTuple +from mitmproxy import ctx + # The actual payload is put between a frontWall and a backWall to make it easy # to locate the payload with regular expressions @@ -83,15 +86,16 @@ def get_cookies(flow: http.HTTPFlow) -> Cookies: return {name: value for name, value in flow.request.cookies.fields} -def find_unclaimed_URLs(body: Union[str, bytes], requestUrl: bytes) -> None: +def find_unclaimed_URLs(body: str, requestUrl: bytes) -> None: """ Look for unclaimed URLs in script tags and log them if found""" - def getValue(attrs: List[Tuple[str, str]], attrName: str) -> str: + def getValue(attrs: List[Tuple[str, str]], attrName: str) -> Optional[str]: for name, value in attrs: if attrName == name: return value + return None class ScriptURLExtractor(HTMLParser): - script_URLs = [] + script_URLs = [] # type: List[str] def handle_starttag(self, tag, attrs): if (tag == "script" or tag == "iframe") and "src" in [name for name, value in attrs]: @@ -100,13 +104,10 @@ def find_unclaimed_URLs(body: Union[str, bytes], requestUrl: bytes) -> None: self.script_URLs.append(getValue(attrs, "href")) parser = ScriptURLExtractor() - try: - parser.feed(body) - except TypeError: - parser.feed(body.decode('utf-8')) + parser.feed(body) for url in parser.script_URLs: - parser = urlparse(url) - domain = parser.netloc + url_parser = urlparse(url) + domain = url_parser.netloc try: gethostbyname(domain) except gaierror: @@ -178,10 +179,11 @@ def log_SQLi_data(sqli_info: Optional[SQLiData]) -> None: if not sqli_info: return ctx.log.error("===== SQLi Found =====") - ctx.log.error("SQLi URL: %s" % sqli_info.url.decode('utf-8')) - ctx.log.error("Injection Point: %s" % sqli_info.injection_point.decode('utf-8')) - ctx.log.error("Regex used: %s" % sqli_info.regex.decode('utf-8')) - ctx.log.error("Suspected DBMS: %s" % sqli_info.dbms.decode('utf-8')) + ctx.log.error("SQLi URL: %s" % sqli_info.url) + ctx.log.error("Injection Point: %s" % sqli_info.injection_point) + ctx.log.error("Regex used: %s" % sqli_info.regex) + ctx.log.error("Suspected DBMS: %s" % sqli_info.dbms) + return def get_SQLi_data(new_body: str, original_body: str, request_URL: str, injection_point: str) -> Optional[SQLiData]: @@ -202,20 +204,21 @@ def get_SQLi_data(new_body: str, original_body: str, request_URL: str, injection "Sybase": (r"(?i)Warning.*sybase.*", r"Sybase message", r"Sybase.*Server message.*"), } for dbms, regexes in DBMS_ERRORS.items(): - for regex in regexes: + for regex in regexes: # type: ignore if re.search(regex, new_body, re.IGNORECASE) and not re.search(regex, original_body, re.IGNORECASE): return SQLiData(request_URL, injection_point, regex, dbms) + return None # A qc is either ' or " -def inside_quote(qc: str, substring: bytes, text_index: int, body: bytes) -> bool: +def inside_quote(qc: str, substring_bytes: bytes, text_index: int, body_bytes: bytes) -> bool: """ Whether the Numberth occurence of the first string in the second string is inside quotes as defined by the supplied QuoteChar """ - substring = substring.decode('utf-8') - body = body.decode('utf-8') + substring = substring_bytes.decode('utf-8') + body = body_bytes.decode('utf-8') num_substrings_found = 0 in_quote = False for index, char in enumerate(body): @@ -238,20 +241,20 @@ def inside_quote(qc: str, substring: bytes, text_index: int, body: bytes) -> boo return False -def paths_to_text(html: str, str: str) -> List[str]: +def paths_to_text(html: str, string: str) -> List[str]: """ Return list of Paths to a given str in the given HTML tree - Note that it does a BFS """ - def remove_last_occurence_of_sub_string(str: str, substr: str): + def remove_last_occurence_of_sub_string(string: str, substr: str) -> str: """ Delete the last occurence of substr from str String String -> String """ - index = str.rfind(substr) - return str[:index] + str[index + len(substr):] + index = string.rfind(substr) + return string[:index] + string[index + len(substr):] class PathHTMLParser(HTMLParser): currentPath = "" - paths = [] + paths = [] # type: List[str] def handle_starttag(self, tag, attrs): self.currentPath += ("/" + tag) @@ -260,7 +263,7 @@ def paths_to_text(html: str, str: str) -> List[str]: self.currentPath = remove_last_occurence_of_sub_string(self.currentPath, "/" + tag) def handle_data(self, data): - if str in data: + if string in data: self.paths.append(self.currentPath) parser = PathHTMLParser() @@ -268,7 +271,7 @@ def paths_to_text(html: str, str: str) -> List[str]: return parser.paths -def get_XSS_data(body: str, request_URL: str, injection_point: str) -> Optional[XSSData]: +def get_XSS_data(body: Union[str, bytes], request_URL: str, injection_point: str) -> Optional[XSSData]: """ Return a XSSDict if there is a XSS otherwise return None """ def in_script(text, index, body) -> bool: """ Whether the Numberth occurence of the first string in the second @@ -314,9 +317,9 @@ def get_XSS_data(body: str, request_URL: str, injection_point: str) -> Optional[ matches = regex.findall(body) for index, match in enumerate(matches): # Where the string is injected into the HTML - in_script = in_script(match, index, body) - in_HTML = in_HTML(match, index, body) - in_tag = not in_script and not in_HTML + in_script_val = in_script(match, index, body) + in_HTML_val = in_HTML(match, index, body) + in_tag = not in_script_val and not in_HTML_val in_single_quotes = inside_quote("'", match, index, body) in_double_quotes = inside_quote('"', match, index, body) # Whether you can inject: @@ -327,17 +330,17 @@ def get_XSS_data(body: str, request_URL: str, injection_point: str) -> Optional[ inject_slash = b"sl/bsl" in match # forward slashes inject_semi = b"se;sl" in match # semicolons inject_equals = b"eq=" in match # equals sign - if in_script and inject_slash and inject_open_angle and inject_close_angle: # e.g. <script>PAYLOAD</script> + if in_script_val and inject_slash and inject_open_angle and inject_close_angle: # e.g. <script>PAYLOAD</script> return XSSData(request_URL, injection_point, '</script><script>alert(0)</script><script>', match.decode('utf-8')) - elif in_script and in_single_quotes and inject_single_quotes and inject_semi: # e.g. <script>t='PAYLOAD';</script> + elif in_script_val and in_single_quotes and inject_single_quotes and inject_semi: # e.g. <script>t='PAYLOAD';</script> return XSSData(request_URL, injection_point, "';alert(0);g='", match.decode('utf-8')) - elif in_script and in_double_quotes and inject_double_quotes and inject_semi: # e.g. <script>t="PAYLOAD";</script> + elif in_script_val and in_double_quotes and inject_double_quotes and inject_semi: # e.g. <script>t="PAYLOAD";</script> return XSSData(request_URL, injection_point, '";alert(0);g="', @@ -380,33 +383,35 @@ def get_XSS_data(body: str, request_URL: str, injection_point: str) -> Optional[ injection_point, " onmouseover=alert(0) t=", match.decode('utf-8')) - elif in_HTML and not in_script and inject_open_angle and inject_close_angle and inject_slash: # e.g. <html>PAYLOAD</html> + elif in_HTML_val and not in_script_val and inject_open_angle and inject_close_angle and inject_slash: # e.g. <html>PAYLOAD</html> return XSSData(request_URL, injection_point, '<script>alert(0)</script>', match.decode('utf-8')) else: return None + return None # response is mitmproxy's entry point def response(flow: http.HTTPFlow) -> None: - cookiesDict = get_cookies(flow) + cookies_dict = get_cookies(flow) + resp = flow.response.get_text(strict=False) # Example: http://xss.guru/unclaimedScriptTag.html - find_unclaimed_URLs(flow.response.content, flow.request.url) - results = test_end_of_URL_injection(flow.response.content.decode('utf-8'), flow.request.url, cookiesDict) + find_unclaimed_URLs(resp, flow.request.url) + results = test_end_of_URL_injection(resp, flow.request.url, cookies_dict) log_XSS_data(results[0]) log_SQLi_data(results[1]) # Example: https://daviddworken.com/vulnerableReferer.php - results = test_referer_injection(flow.response.content.decode('utf-8'), flow.request.url, cookiesDict) + results = test_referer_injection(resp, flow.request.url, cookies_dict) log_XSS_data(results[0]) log_SQLi_data(results[1]) # Example: https://daviddworken.com/vulnerableUA.php - results = test_user_agent_injection(flow.response.content.decode('utf-8'), flow.request.url, cookiesDict) + results = test_user_agent_injection(resp, flow.request.url, cookies_dict) log_XSS_data(results[0]) log_SQLi_data(results[1]) if "?" in flow.request.url: # Example: https://daviddworken.com/vulnerable.php?name= - results = test_query_injection(flow.response.content.decode('utf-8'), flow.request.url, cookiesDict) + results = test_query_injection(resp, flow.request.url, cookies_dict) log_XSS_data(results[0]) log_SQLi_data(results[1]) |