From 1dc5658f740e051b10197d51f81ee78563b22908 Mon Sep 17 00:00:00 2001 From: retoor Date: Sun, 29 Dec 2024 21:17:45 +0100 Subject: [PATCH] Added reveiw. --- .gitignore | 3 +- .reviews/src/zamenyat/__main__.py.json | 11 ++++++ .reviews/src/zamenyat/__main__.py.md | 25 +++++++++++++ .reviews/src/zamenyat/app.py.json | 11 ++++++ .reviews/src/zamenyat/app.py.md | 28 +++++++++++++++ .reviews/src/zamenyat/app2.py.json | 11 ++++++ .reviews/src/zamenyat/app2.py.md | 24 +++++++++++++ README.md | 49 +++++++++++++++++++++++++ review.md | 50 ++++++++++++++++++++++++++ 9 files changed, 211 insertions(+), 1 deletion(-) create mode 100644 .reviews/src/zamenyat/__main__.py.json create mode 100644 .reviews/src/zamenyat/__main__.py.md create mode 100644 .reviews/src/zamenyat/app.py.json create mode 100644 .reviews/src/zamenyat/app.py.md create mode 100644 .reviews/src/zamenyat/app2.py.json create mode 100644 .reviews/src/zamenyat/app2.py.md create mode 100644 review.md diff --git a/.gitignore b/.gitignore index 4500ec5..edef6ba 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,8 @@ # ---> Python .history .vscode - +.backup* +app2.py .replace.json # Byte-compiled / optimized / DLL files __pycache__/ diff --git a/.reviews/src/zamenyat/__main__.py.json b/.reviews/src/zamenyat/__main__.py.json new file mode 100644 index 0000000..8d18178 --- /dev/null +++ b/.reviews/src/zamenyat/__main__.py.json @@ -0,0 +1,11 @@ +{ + "extension": ".py", + "source": "# Written by retoor@molodetz.nl\n\n# This script sets up a command-line interface to run a server that replaces sensitive content using the Application from the zamenyat.app module. \n\n# Imports: The script imports argparse and Application from the zamenyat.app module.\n\n# MIT License\n# \n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n# \n# The above copyright notice and this permission notice shall be included in all\n# copies or substantial portions of the Software.\n# \n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n# SOFTWARE.\n\nimport argparse\nfrom zamenyat.app import Application\n\nparser = argparse.ArgumentParser(description=\"Zamenyat sensitive content replacer.\")\nparser.add_argument(\"--host\", required=True, type=str)\nparser.add_argument(\"--port\", required=True, type=int)\nparser.add_argument(\"--upstream-host\", required=True, type=str)\nparser.add_argument(\"--upstream-port\", required=True, type=int)\n\ndef main():\n args = parser.parse_args()\n app = Application(upstream_host=args.upstream_host, upstream_port=args.upstream_port)\n app.serve(host=args.host, port=args.port)\n\nif __name__ == \"__main__\":\n main()", + "review": "# 7\n\n## Bugs\n- No explicit bug was noted in the code as it is straightforward and uses standard libraries. However, there is a reliance on the `Application` class in `zamenyat.app` without any error handling around its usage which could lead to runtime errors if `Application` is not correctly defined or behaves unexpectedly.\n\n## Optimizations\n- Include error handling around the `Application` instantiation and connection to handle potential issues cleanly.\n- Validate user inputs (e.g., using regular expressions to verify hostnames and port ranges) before passing them to the `Application`.\n- Add logging to aid in debugging and monitoring the application's behavior and actions.\n- Consider providing default host and port values, or a configuration file to simplify the command-line requirements for common use cases.\n\n## Good points\n- The script is simple and easy to understand, making efficient use of Python's argparse module to handle command line arguments.\n- The structure is clean and modular with a well-defined entry point via the `main()` function.\n- Adheres to best practices by checking for `__name__ == \"__main__\"` before executing the main routine, enabling easier import and testing.\n- Clear comments and a concise description at the beginning help clarify the script's purpose.\n\n## Summary\nOverall, the source code is clean, adheres to good coding standards, and is well-documented. The main focus is on initiating a service through an unspecified `Application` object that listens on a specified host and port, forwarding requests to an upstream host and port. While it's concise and effective for its intended functionality, it would benefit from additional robustness via error handling, input validation, and possibly default settings for ease of use.\n\n## Open source alternatives\n- **Nginx and Apache HTTP Server**: Often used for proxying functionality with various plugins for content filtering.\n- **Squid Proxy**: Another proxy tool that can be configured for content filtering and traffic management.\n- **ModSecurity**: Works with web servers and can filter traffic for sensitive content replacement or blocking.\n- **Privoxy**: A proxy server that can filter content and manage privacy-enhancing jobs.\n", + "filename": "__main__.py", + "path": "src/zamenyat/__main__.py", + "directory": "zamenyat", + "grade": 7, + "size": 1938, + "line_count": 42 +} \ No newline at end of file diff --git a/.reviews/src/zamenyat/__main__.py.md b/.reviews/src/zamenyat/__main__.py.md new file mode 100644 index 0000000..7d013e3 --- /dev/null +++ b/.reviews/src/zamenyat/__main__.py.md @@ -0,0 +1,25 @@ +# 7 + +## Bugs +- No explicit bug was noted in the code as it is straightforward and uses standard libraries. However, there is a reliance on the `Application` class in `zamenyat.app` without any error handling around its usage which could lead to runtime errors if `Application` is not correctly defined or behaves unexpectedly. + +## Optimizations +- Include error handling around the `Application` instantiation and connection to handle potential issues cleanly. +- Validate user inputs (e.g., using regular expressions to verify hostnames and port ranges) before passing them to the `Application`. +- Add logging to aid in debugging and monitoring the application's behavior and actions. +- Consider providing default host and port values, or a configuration file to simplify the command-line requirements for common use cases. + +## Good points +- The script is simple and easy to understand, making efficient use of Python's argparse module to handle command line arguments. +- The structure is clean and modular with a well-defined entry point via the `main()` function. +- Adheres to best practices by checking for `__name__ == "__main__"` before executing the main routine, enabling easier import and testing. +- Clear comments and a concise description at the beginning help clarify the script's purpose. + +## Summary +Overall, the source code is clean, adheres to good coding standards, and is well-documented. The main focus is on initiating a service through an unspecified `Application` object that listens on a specified host and port, forwarding requests to an upstream host and port. While it's concise and effective for its intended functionality, it would benefit from additional robustness via error handling, input validation, and possibly default settings for ease of use. + +## Open source alternatives +- **Nginx and Apache HTTP Server**: Often used for proxying functionality with various plugins for content filtering. +- **Squid Proxy**: Another proxy tool that can be configured for content filtering and traffic management. +- **ModSecurity**: Works with web servers and can filter traffic for sensitive content replacement or blocking. +- **Privoxy**: A proxy server that can filter content and manage privacy-enhancing jobs. diff --git a/.reviews/src/zamenyat/app.py.json b/.reviews/src/zamenyat/app.py.json new file mode 100644 index 0000000..5bf6b43 --- /dev/null +++ b/.reviews/src/zamenyat/app.py.json @@ -0,0 +1,11 @@ +{ + "extension": ".py", + "source": "# Written by retoor@molodetz.nl\n\n# This script is a network proxy that intercepts HTTP communication, replaces specific content in headers and data, and passes communication between clients and an upstream server. It supports chunked transfer encoding, keep-alive connections, and forking for client connections.\n\n# Import summary: Uses socket, asyncio, pathlib, os, signal, and json modules from Python standard library.\n\n# MIT License\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:\n# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.\n\nimport socket\nimport asyncio\nimport pathlib\nimport os\nimport signal\nimport json\nfrom concurrent.futures import ThreadPoolExecutor as Executor\n\ndef set_header_value(headers, key, value):\n value = str(value)\n headers = headers.decode(\"utf-8\").rstrip(\"\\r\\n\\r\\n\")\n parts = headers.split(key + \": \")\n if len(parts) > 1:\n headers = headers.replace(parts[1].split(\"\\r\\n\")[0], value)\n else:\n headers = headers + key + \": \" + value + \"\\r\\n\"\n return (headers + \"\\r\\n\").encode()\n\ndef get_header_value(headers, key):\n headers = headers.decode(\"utf-8\")\n try:\n parts = headers.split(key + \": \")\n return parts[1].split(\"\\r\\n\")[0]\n except:\n return None \n\ndef get_content_length(headers):\n try:\n return int(get_header_value(headers, \"Content-Length\"))\n except:\n return 0\n\ndef send_all(sock, data):\n while data:\n sent = sock.send(data)\n data = data[sent:]\n\nclass HTTPDocument:\n def __init__(self, headers, data):\n self.headers = headers\n self.data = data\n self.original_content_length = get_content_length(headers)\n self.content_length = self.original_content_length\n\n def replace(self, old, new):\n print(\"Replaced\")\n self.data = self.data.replace(old.encode(), new.encode())\n if self.original_content_length != len(self.data):\n self.headers = set_header_value(self.headers, \"Content-Length\", len(self.data))\n return True\n return False\n\n @property\n def content(self):\n return self.data\n\nclass Protocol:\n def __init__(self, document, downstream, upstream):\n self.upstream = upstream\n self.downstream = downstream\n self.document = document\n self.bytes_sent = 0\n \n def stream(self):\n while self.bytes_sent != self.document.content_length:\n chunk = self.downstream.recv(1)\n self.bytes_sent += 1\n self.upstream.sendall(chunk)\n\ndef read_until(sock, delim):\n data = b\"\"\n try:\n while True:\n d = sock.recv(1)\n data += d\n if data.endswith(delim):\n return data\n except Exception as ex:\n print(ex)\n return None\n\ndef communicate(sock, config):\n upstream = socket.socket(socket.AF_INET, socket.SOCK_STREAM)\n upstream.connect((\"127.0.0.1\", 8082))\n while True:\n headers = read_until(sock, b\"\\r\\n\\r\\n\")\n print(headers)\n if not headers:\n sock.close()\n break\n\n upstream.sendall(headers)\n content_length = get_content_length(headers)\n if content_length:\n doc = HTTPDocument(headers, b'')\n protocol = Protocol(doc, sock, upstream) \n protocol.stream()\n\n headers = read_until(upstream, b\"\\r\\n\\r\\n\")\n for key, value in config.get(\"upstream\", {}).items():\n headers = headers.replace(key.encode(), value.encode())\n print(headers)\n sock.sendall(headers)\n content_length = get_content_length(headers)\n handled = content_length > 0 \n if content_length:\n doc = HTTPDocument(headers, b'')\n protocol = Protocol(doc, upstream, sock) \n protocol.stream()\n\n if get_header_value(headers, \"Connection\") == \"close\":\n sock.close()\n return\n\n if get_header_value(headers, \"Upgrade\") == \"websocket\":\n sock.close()\n return\n\n if get_header_value(headers, \"Transfer-Encoding\") == \"chunked\":\n while True:\n headers = read_until(upstream, b\"\\r\\n\")\n content_length = int(headers[:-2], 16)\n if not content_length:\n sock.sendall(b\"\\r\\n\")\n break\n data = b''\n while len(data) < content_length:\n chunk = upstream.recv(1)\n data += chunk\n \n for key, value in config.get(\"downstream\", {}).items():\n data = data.replace(key.encode(), value.encode())\n content_length = len(data)\n data = hex(content_length)[2:].encode() + b\"\\r\\n\" + data\n data += upstream.recv(2)\n sock.sendall(data)\n if not data:\n break\n \n print(data)\n upstream.close()\n sock.close()\n break\n \n if not handled:\n upstream.close()\n sock.close()\n break\n \n if get_header_value(headers, \"Connection\") == \"keep-alive\":\n continue\n break\n\ndef reap_zombie_processes():\n while True:\n try:\n pid, _ = os.waitpid(-1, os.WNOHANG)\n if pid == 0:\n break\n except ChildProcessError:\n break\n\nasync def serve(host, port, config):\n config = json.loads(pathlib.Path(config).read_text())\n signal.signal(signal.SIGCHLD, lambda signum, frame: reap_zombie_processes())\n \n server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)\n executor = Executor(100)\n server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)\n server.bind((host, port))\n server.listen()\n clients = []\n while True:\n client, address = server.accept()\n clients.append(client)\n print(f\"Connection from {address}\")\n pid = os.fork() \n if pid == 0:\n server.close()\n communicate(client, config)\n os._exit(0)\n else:\n client.close()\n\nasyncio.run(serve(\"0.0.0.0\", 3046, \".replace.json\"))", + "review": "**Grade: 5**\n\n## Bugs\n- The script does not handle errors gracefully during socket communication, leading to potential unhandled exceptions and network problems.\n- The `read_until` function may cause the program to hang indefinitely if the data received never includes the delimiter. \n- The `communicate` function lacks proper exception handling mechanisms for network communication.\n- Unhandled exceptions in networking functions can cause the program to crash unexpectedly.\n\n## Optimizations\n- Implement asynchronous I/O operations to improve the efficiency of handling multiple connections.\n- Utilize structured logging instead of plain `print` statements for better traceability and log management.\n- Use `selectors` or `asyncio` for handling multiple connections more efficiently instead of manually forking processes.\n- Optimize string manipulation operations when handling headers to prevent performance impacts.\n- Implement a connection pool for reusing connections to the upstream server if it suits the use case.\n- Consider using `asyncio`'s `start_server` instead of manual socket handling for a more robust solution on handling connections asynchronously.\n\n## Good points\n- The code structures the core functionality such as HTTP data parsing and modifications into separate, well-defined functions.\n- Use of `dataclass` constructs (or similar) to manage data like `HTTPDocument` could improve clarity, though the current class-based approach lays a good foundation.\n- Creative use of socket programming to implement a proxy system, with a clear intent of design from the author's comments.\n\n## Summary\nThe script is a basic implementation of a network proxy capable of intercepting and modifying HTTP communications. While it captures the base logic reasonably well, the code would benefit from improved error handling, optimizations for handling multiple connections efficiently, and better resource management. Logging could be enhanced, and the adoption of `asyncio` patterns would provide a cleaner and more scalable solution for managing concurrent connections. This would help mitigate the potential performance and reliability issues identified.\n\n## Open source alternatives\n- **mitmproxy**: An interactive, SSL/TLS-capable intercepting proxy with a console interface.\n- **Squid**: Originally designed for caching web proxies but can be configured to perform similar roles.\n- **Tinyproxy**: A lightweight and fast HTTP/HTTPS proxy daemon ideal for environments where resource usage is critical.", + "filename": "app.py", + "path": "src/zamenyat/app.py", + "directory": "zamenyat", + "grade": 5, + "size": 7133, + "line_count": 197 +} \ No newline at end of file diff --git a/.reviews/src/zamenyat/app.py.md b/.reviews/src/zamenyat/app.py.md new file mode 100644 index 0000000..3708ad7 --- /dev/null +++ b/.reviews/src/zamenyat/app.py.md @@ -0,0 +1,28 @@ +**Grade: 5** + +## Bugs +- The script does not handle errors gracefully during socket communication, leading to potential unhandled exceptions and network problems. +- The `read_until` function may cause the program to hang indefinitely if the data received never includes the delimiter. +- The `communicate` function lacks proper exception handling mechanisms for network communication. +- Unhandled exceptions in networking functions can cause the program to crash unexpectedly. + +## Optimizations +- Implement asynchronous I/O operations to improve the efficiency of handling multiple connections. +- Utilize structured logging instead of plain `print` statements for better traceability and log management. +- Use `selectors` or `asyncio` for handling multiple connections more efficiently instead of manually forking processes. +- Optimize string manipulation operations when handling headers to prevent performance impacts. +- Implement a connection pool for reusing connections to the upstream server if it suits the use case. +- Consider using `asyncio`'s `start_server` instead of manual socket handling for a more robust solution on handling connections asynchronously. + +## Good points +- The code structures the core functionality such as HTTP data parsing and modifications into separate, well-defined functions. +- Use of `dataclass` constructs (or similar) to manage data like `HTTPDocument` could improve clarity, though the current class-based approach lays a good foundation. +- Creative use of socket programming to implement a proxy system, with a clear intent of design from the author's comments. + +## Summary +The script is a basic implementation of a network proxy capable of intercepting and modifying HTTP communications. While it captures the base logic reasonably well, the code would benefit from improved error handling, optimizations for handling multiple connections efficiently, and better resource management. Logging could be enhanced, and the adoption of `asyncio` patterns would provide a cleaner and more scalable solution for managing concurrent connections. This would help mitigate the potential performance and reliability issues identified. + +## Open source alternatives +- **mitmproxy**: An interactive, SSL/TLS-capable intercepting proxy with a console interface. +- **Squid**: Originally designed for caching web proxies but can be configured to perform similar roles. +- **Tinyproxy**: A lightweight and fast HTTP/HTTPS proxy daemon ideal for environments where resource usage is critical. \ No newline at end of file diff --git a/.reviews/src/zamenyat/app2.py.json b/.reviews/src/zamenyat/app2.py.json new file mode 100644 index 0000000..3cd59b7 --- /dev/null +++ b/.reviews/src/zamenyat/app2.py.json @@ -0,0 +1,11 @@ +{ + "extension": ".py", + "source": "# Written by retoor@molodetz.nl\n\n# This code defines an application for handling asynchronous socket connections with capability for reading and writing chunks of data, capable of handling HTTP requests, chunked transfer encoding, and WebSocket upgrades.\n\n# Imports: from app.app import get_timestamp\n\n# MIT License\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in all\n# copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n# SOFTWARE.\n\n\nimport asyncio\nimport time\nfrom concurrent.futures import ThreadPoolExecutor as Executor\n\nfrom app.app import get_timestamp\n\nZAMENYAT_BACKLOG = 100\nZAMENYAT_THREAD_COUNT = 2500\nZAMENYAT_BUFFER_SIZE = 4096\nZAMENYAT_HEADER_MAX_LENGTH = 4096 * 2\n\nclass AsyncWriter:\n def __init__(self, writer, buffer_size=ZAMENYAT_BUFFER_SIZE, debug=False):\n self.debug = debug\n self.writer = writer\n self.buffer_size = buffer_size\n self.drain = self.writer.drain\n self.close = self.writer.close\n self.wait_closed = self.writer.wait_closed\n\n async def write(self, data):\n while data:\n chunk_size = self.buffer_size if len(data) > self.buffer_size else len(data)\n chunk = data[:chunk_size]\n self.writer.write(chunk)\n await self.writer.drain()\n if self.debug:\n print(\"Write chunk:\", chunk)\n data = data[chunk_size:]\n await self.writer.drain()\n\nclass AsyncReader:\n def __init__(self, reader, debug=False):\n self.reader = reader\n self.buffer = b\"\"\n self.debug = debug\n\n async def read_until(self, to_match):\n buffer = b''\n while to_match not in buffer:\n chunk = await self.read(1)\n if not chunk:\n return None\n buffer += chunk\n match_start = buffer.find(to_match)\n data = buffer[: match_start + len(to_match)]\n await self.unread(buffer[match_start+len(to_match):])\n return data\n \n async def peek(self, buffer_size):\n data = await self.read(buffer_size)\n await self.unread(data)\n return data\n\n async def read(self, buffer_size=ZAMENYAT_BUFFER_SIZE, exact=False):\n while len(self.buffer) < buffer_size:\n chunk = await self.reader.read(1)\n if not chunk:\n return None\n if self.debug:\n print(\"Read chunk:\", chunk)\n self.buffer += chunk\n \n buffer_size = (\n len(self.buffer) if len(self.buffer) < buffer_size else buffer_size\n )\n data = self.buffer[:buffer_size]\n self.buffer = self.buffer[buffer_size:]\n return data\n\n async def unread(self, data):\n if not data:\n return\n if hasattr(data, \"encode\"):\n data = data.encode()\n self.buffer = data + self.buffer\n\nclass Socket:\n def __init__(self, reader, writer, buffer_size, debug=False):\n self.debug = debug\n self.reader = AsyncReader(reader, debug=self.debug)\n self.writer = AsyncWriter(writer, debug=self.debug)\n self.read = self.reader.read\n self.read_until = self.reader.read_until\n self.peek = self.reader.peek\n self.unread = self.reader.unread\n self.write = self.writer.write\n self.drain = self.writer.drain\n self.close = self.writer.close\n self.wait_closed = self.writer.wait_closed\n\nclass Application:\n def __init__(self, upstream_host, upstream_port, silent=False, *args, **kwargs):\n self.upstream_host = upstream_host\n self.upstream_port = upstream_port\n self.server = None\n self.silent = silent\n self.host = None\n self.port = None\n self.executor = None\n self.buffer_size = ZAMENYAT_BUFFER_SIZE\n self.header_max_length = ZAMENYAT_HEADER_MAX_LENGTH\n self.connection_count = 0\n self.total_connection_count = 0\n super().__init__(*args, **kwargs)\n\n async def get_headers(self, reader):\n headers = await reader.reader.readuntil(b\"\\r\\n\\r\\n\")\n if not headers:\n return None, None\n headers = headers\n header_dict = {}\n req_resp, *headers = headers.split(b\"\\r\\n\")\n for header_line in headers:\n try:\n key, *value = header_line.split(b\": \")\n except ValueError:\n continue\n key = key.decode()\n value = \": \".join([value.decode() for value in value])\n header_dict[key] = int(value) if value.isdigit() else value\n return req_resp.decode(), header_dict\n\n def header_dict_to_bytes(self, req_resp, headers):\n header_list = [req_resp]\n for key, value in headers.items():\n if not key:\n continue\n header_list.append(f\"{key}: {value}\")\n result = (\"\\r\\n\".join(header_list)).encode()\n print(\"qqq\", result, \"qqq\")\n return result + b\"\\r\\n\\r\\n\"\n\n async def handle_chunked(self, reader, writer):\n print(\"\\n\\nCHUNKING STARTED\") \n while True:\n chunk_size_bytes = (await reader.reader.readuntil(b\"\\r\\n\"))\n await writer.write(chunk_size_bytes)\n chunk_size = int(chunk_size_bytes[:-2], 16)\n if chunk_size == 0:\n data = await reader.reader.readuntil(b\"\\r\\n\")\n await writer.write(data)\n print(\"CHUNKING ENDED\")\n break\n chunk = await reader.reader.readexactly(chunk_size + 2)\n await writer.write(chunk)\n \n async def write_in_chunks(self, writer: asyncio.StreamWriter, data: bytes, chunk_size: int = 1024):\n for i in range(0, len(data), chunk_size):\n chunk = data[i:i + chunk_size]\n writer.write(chunk)\n await writer.drain()\n print(f\"Sent chunk: {chunk.decode()}\") \n\n async def handle_basic_request(self, reader, writer):\n try:\n req_resp, headers = None, None\n data = b\"\"\n print(\"BEFORE READ\")\n req_resp, headers = await self.get_headers(reader)\n print(\"AFTER READ\")\n if not headers:\n print(\"GGG\")\n return None\n if \"Content-Length\" in headers and headers['Content-Length']:\n data = await reader.reader.readexactly(headers['Content-Length'])\n await self.write_in_chunks(writer.writer, self.header_dict_to_bytes(req_resp, headers) + (data or b\"\"), 1024)\n print(\"HEADERS SENT\")\n except asyncio.CancelledError:\n pass\n finally:\n pass\n return headers\n\n async def stream_websocket(self, reader, writer):\n while True:\n chunk = await reader.read(1)\n if not chunk:\n await writer.close()\n await writer.wait_closed()\n break\n await writer.write(chunk)\n await writer.drain()\n\n async def handle_client(self, reader, writer):\n self.connection_count += 1\n self.total_connection_count += 1\n connection_nr = self.total_connection_count\n upstream_reader, upstream_writer = await asyncio.open_connection(\n self.upstream_host, self.upstream_port\n )\n reader = AsyncReader(reader)\n writer = AsyncWriter(writer)\n upstream_reader = AsyncReader(upstream_reader)\n upstream_writer = AsyncWriter(upstream_writer)\n\n is_websocket = False\n keep_alive = False\n time_start = time.time()\n if not self.silent:\n print(\n f\"Connected to upstream #{connection_nr} server {self.upstream_host}:{self.upstream_port} #{self.connection_count} Time: {get_timestamp()}\"\n )\n async with asyncio.Semaphore(1) as s:\n request_headers = await self.handle_basic_request(\n reader, upstream_writer\n )\n \n response_headers = await self.handle_basic_request(\n upstream_reader, writer\n )\n if request_headers.get('Connection') == 'close':\n print(\"JAAA\")\n request_headers = None\n if response_headers.get('Connection') == 'close':\n print(\"JAAA\")\n request_headers = None\n response_headers = None\n\n if request_headers and response_headers and response_headers.get('Transfer-Encoding') == 'chunked':\n await self.handle_chunked(upstream_reader, writer)\n else:\n if request_headers and request_headers.get(\"Upgrade\") == \"websocket\":\n print(\"WEBSOCKET\")\n await asyncio.gather(\n self.stream_websocket(reader, upstream_writer),\n self.stream_websocket(upstream_reader, writer),\n )\n else:\n while request_headers and request_headers.get(\"Connection\") == \"keep-alive\":\n request_headers = await self.handle_basic_request(upstream_reader, writer)\n response_headers = await self.handle_basic_request(upstream_reader, writer)\n if response_headers['Connection'] == 'close':\n break\n\n time_end = time.time()\n time_duration = time_end - time_start\n if not self.silent:\n print(\n f\"Disconnected upstream #{connection_nr} server {self.upstream_host}:{self.upstream_port} #{self.connection_count} Duration: {time_duration:.5f}s\"\n )\n\n self.connection_count -= 1\n writer.close()\n upstream_writer.close()\n await writer.wait_closed()\n await upstream_writer.wait_closed()\n\n def upgrade_executor(self, thread_count):\n self.executor = Executor(max_workers=thread_count)\n loop = asyncio.get_running_loop()\n loop.set_default_executor(self.executor)\n return self.executor\n\n async def serve_async(self, host, port, backlog=ZAMENYAT_BACKLOG):\n self.upgrade_executor(ZAMENYAT_THREAD_COUNT)\n self.host = host\n self.port = port\n self.server = await asyncio.start_server(\n self.handle_client, self.host, self.port, backlog=backlog\n )\n async with self.server:\n await self.server.serve_forever()\n\n def serve(self, host, port, backlog=ZAMENYAT_BACKLOG):\n try:\n asyncio.run(self.serve_async(host, port, backlog=backlog))\n except KeyboardInterrupt:\n print(\"Shut down server\")", + "review": "# 6\n\n## Bugs\n- The `get_headers` function may unexpectedly return `None` for both `req_resp` and `header_dict` upon failure to read headers; this can cause issues in subsequent logic that expects these to be valid.\n- In `handle_client`, the `request_headers` variable may remain `None`, causing potential processing logic errors.\n- Potential infinite loop in `handle_client` if neither `Connection: close` nor `keep-alive` conditions terminate loops.\n\n## Optimizations\n- Consider using built-in or third-party libraries for WebSocket management, like `websockets`, to reduce code complexity and improve reliability.\n- Use more Pythonic and efficient string and bytes handling, rather than manual string manipulations.\n- Use `async with` for cleaner resource management, such as the handling of streams and connections.\n- Simplify the `handle_basic_request` function by breaking it into smaller, more specific, and testable parts.\n- Reduce the high number of threads (2500), unless explicitly needed, as it may cause resource exhaustion or performance penalties.\n \n## Good points\n- The use of asyncio provides non-blocking I/O, which is suitable for handling multiple socket connections asynchronously.\n- The modular structure and separation of concerns are commendable, as seen in the `Socket`, `AsyncReader`, and `AsyncWriter` classes.\n\n## Summary\nThis code implements a non-blocking socket server with capabilities like handling HTTP requests and WebSocket connections. It uses asyncio for asynchronous operations, which is appropriate for handling multiple network connections efficiently. While it demonstrates a good understanding of asynchronous programming with clear class design for reading and writing, there are areas necessitating more robust error handling and optimization for better performance and code maintainability.\n\n## Open source alternatives\n- **aiohttp**: A popular asynchronous HTTP client/server framework that includes WebSocket support.\n- **websockets**: A Python library for building WebSocket servers and clients with asyncio support.", + "filename": "app2.py", + "path": "src/zamenyat/app2.py", + "directory": "zamenyat", + "grade": 6, + "size": 11488, + "line_count": 296 +} \ No newline at end of file diff --git a/.reviews/src/zamenyat/app2.py.md b/.reviews/src/zamenyat/app2.py.md new file mode 100644 index 0000000..4be48e8 --- /dev/null +++ b/.reviews/src/zamenyat/app2.py.md @@ -0,0 +1,24 @@ +# 6 + +## Bugs +- The `get_headers` function may unexpectedly return `None` for both `req_resp` and `header_dict` upon failure to read headers; this can cause issues in subsequent logic that expects these to be valid. +- In `handle_client`, the `request_headers` variable may remain `None`, causing potential processing logic errors. +- Potential infinite loop in `handle_client` if neither `Connection: close` nor `keep-alive` conditions terminate loops. + +## Optimizations +- Consider using built-in or third-party libraries for WebSocket management, like `websockets`, to reduce code complexity and improve reliability. +- Use more Pythonic and efficient string and bytes handling, rather than manual string manipulations. +- Use `async with` for cleaner resource management, such as the handling of streams and connections. +- Simplify the `handle_basic_request` function by breaking it into smaller, more specific, and testable parts. +- Reduce the high number of threads (2500), unless explicitly needed, as it may cause resource exhaustion or performance penalties. + +## Good points +- The use of asyncio provides non-blocking I/O, which is suitable for handling multiple socket connections asynchronously. +- The modular structure and separation of concerns are commendable, as seen in the `Socket`, `AsyncReader`, and `AsyncWriter` classes. + +## Summary +This code implements a non-blocking socket server with capabilities like handling HTTP requests and WebSocket connections. It uses asyncio for asynchronous operations, which is appropriate for handling multiple network connections efficiently. While it demonstrates a good understanding of asynchronous programming with clear class design for reading and writing, there are areas necessitating more robust error handling and optimization for better performance and code maintainability. + +## Open source alternatives +- **aiohttp**: A popular asynchronous HTTP client/server framework that includes WebSocket support. +- **websockets**: A Python library for building WebSocket servers and clients with asyncio support. \ No newline at end of file diff --git a/README.md b/README.md index 34cb286..2917279 100644 --- a/README.md +++ b/README.md @@ -2,3 +2,52 @@ HTTP bridge configurable to replace the content you want to see replaced. This can be used to have a real version and anonymous version for a website for example like I did. This site exists in the retoor version and under my real name for example. +# Review Summary of Zamenyat Project + +## Project Overview +The Zamenyat project includes multiple Python scripts aimed at handling network communication through proxy and server functionalities. The scripts offer capabilities like sensitive content replacement, handling of asynchronous socket connections, and HTTP headers manipulation among others. + +## Code Reviews + +### Script 1: `src/zamenyat/__main__.py` + +- **Functionality**: Sets up a command-line interface to run a server using the `zamenyat.app` module. +- **Bugs**: No explicit bugs noted; relies on `Application` class without error handling. +- **Optimizations**: + - Add error handling around `Application`. + - Validate user inputs. + - Add logging. + - Default host and port values. +- **Good Points**: Clean, modular code with proper use of argparse and entry-point checks. +- **Grade**: 7/10 + +### Script 2: `src/zamenyat/app2.py` + +- **Functionality**: Handles asynchronous socket connections, HTTP requests, chunked encoding, and WebSocket upgrades. +- **Bugs**: Potential undefined variables and logic errors concerning header handling. +- **Optimizations**: + - Use libraries like `websockets` for WebSocket management. + - Use `async with` for resource management. + - Simplify function structures. +- **Good Points**: Good use of asyncio for non-blocking I/O and clear module separation. +- **Grade**: 6/10 + +### Script 3: `src/zamenyat/app.py` + +- **Functionality**: A network proxy intercepting HTTP communication, supporting chunked transfer and content replacement. +- **Bugs**: Lacks error handling in socket communication; potential hang in read operation. +- **Optimizations**: + - Implement asynchronous I/O for better efficiency. + - Use structured logging. + - Optimize string manipulations. + - Utilize `asyncio` for robust connections handling. +- **Good Points**: Organized core functionality with structured methods for HTTP processing. +- **Grade**: 5/10 + +## Overall Evaluation +- **Grade**: 6.0/10 +- **General Optimizations**: + - Enhance error handling and input validation. + - Integrate more modern async patterns for improved network handling. + - Include logging and configuration simplifications for ease of use. +- **Alternatives**: Consider using frameworks or libraries like Nginx, aiohttp, and mitmproxy for similar capabilities. diff --git a/review.md b/review.md new file mode 100644 index 0000000..e71bab1 --- /dev/null +++ b/review.md @@ -0,0 +1,50 @@ +markdown +# Review Summary of Zamenyat Project + +## Project Overview +The Zamenyat project includes multiple Python scripts aimed at handling network communication through proxy and server functionalities. The scripts offer capabilities like sensitive content replacement, handling of asynchronous socket connections, and HTTP headers manipulation among others. + +## Code Reviews + +### Script 1: `src/zamenyat/__main__.py` + +- **Functionality**: Sets up a command-line interface to run a server using the `zamenyat.app` module. +- **Bugs**: No explicit bugs noted; relies on `Application` class without error handling. +- **Optimizations**: + - Add error handling around `Application`. + - Validate user inputs. + - Add logging. + - Default host and port values. +- **Good Points**: Clean, modular code with proper use of argparse and entry-point checks. +- **Grade**: 7/10 + +### Script 2: `src/zamenyat/app2.py` + +- **Functionality**: Handles asynchronous socket connections, HTTP requests, chunked encoding, and WebSocket upgrades. +- **Bugs**: Potential undefined variables and logic errors concerning header handling. +- **Optimizations**: + - Use libraries like `websockets` for WebSocket management. + - Use `async with` for resource management. + - Simplify function structures. +- **Good Points**: Good use of asyncio for non-blocking I/O and clear module separation. +- **Grade**: 6/10 + +### Script 3: `src/zamenyat/app.py` + +- **Functionality**: A network proxy intercepting HTTP communication, supporting chunked transfer and content replacement. +- **Bugs**: Lacks error handling in socket communication; potential hang in read operation. +- **Optimizations**: + - Implement asynchronous I/O for better efficiency. + - Use structured logging. + - Optimize string manipulations. + - Utilize `asyncio` for robust connections handling. +- **Good Points**: Organized core functionality with structured methods for HTTP processing. +- **Grade**: 5/10 + +## Overall Evaluation +- **Grade**: 6.0/10 +- **General Optimizations**: + - Enhance error handling and input validation. + - Integrate more modern async patterns for improved network handling. + - Include logging and configuration simplifications for ease of use. +- **Alternatives**: Consider using frameworks or libraries like Nginx, aiohttp, and mitmproxy for similar capabilities.