diff --git a/.gitignore b/.gitignore index 5d381cc..4500ec5 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,8 @@ # ---> Python +.history +.vscode + +.replace.json # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] diff --git a/Makefile b/Makefile index 1504229..681e36b 100644 --- a/Makefile +++ b/Makefile @@ -24,6 +24,9 @@ build: $(PYTHON) -m build run: - $(BIN)$(APP_NAME) --host="0.0.0.0" --port=3046 --upstream-host="127.0.0.1" --upstream-port=9999 + $(BIN)$(APP_NAME) --host="0.0.0.0" --port=3046 --upstream-host="localhost" --upstream-port=8082 +run2: + $(BIN)$(APP_NAME) --host="0.0.0.0" --port=3046 --upstream-host="localhost" --upstream-port=9999 + diff --git a/src/zamenyat/__main__.py b/src/zamenyat/__main__.py index 235306d..8b557b3 100644 --- a/src/zamenyat/__main__.py +++ b/src/zamenyat/__main__.py @@ -1,22 +1,42 @@ -import argparse +# Written by retoor@molodetz.nl +# This script sets up a command-line interface to run a server that replaces sensitive content using the Application from the zamenyat.app module. + +# Imports: The script imports argparse and Application from the zamenyat.app module. + +# MIT License +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import argparse from zamenyat.app import Application parser = argparse.ArgumentParser(description="Zamenyat sensitive content replacer.") - parser.add_argument("--host", required=True, type=str) parser.add_argument("--port", required=True, type=int) parser.add_argument("--upstream-host", required=True, type=str) parser.add_argument("--upstream-port", required=True, type=int) - def main(): args = parser.parse_args() - app = Application( - upstream_host=args.upstream_host, upstream_port=args.upstream_port - ) + app = Application(upstream_host=args.upstream_host, upstream_port=args.upstream_port) app.serve(host=args.host, port=args.port) - if __name__ == "__main__": - main() + main() \ No newline at end of file diff --git a/src/zamenyat/app.py b/src/zamenyat/app.py index 0bd5126..37c6561 100644 --- a/src/zamenyat/app.py +++ b/src/zamenyat/app.py @@ -1,258 +1,197 @@ +# Written by retoor@molodetz.nl + +# This script is a network proxy that intercepts HTTP communication, replaces specific content in headers and data, and passes communication between clients and an upstream server. It supports chunked transfer encoding, keep-alive connections, and forking for client connections. + +# Import summary: Uses socket, asyncio, pathlib, os, signal, and json modules from Python standard library. + +# MIT License +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +import socket import asyncio -import time +import pathlib +import os +import signal +import json from concurrent.futures import ThreadPoolExecutor as Executor -from app.app import get_timestamp +def set_header_value(headers, key, value): + value = str(value) + headers = headers.decode("utf-8").rstrip("\r\n\r\n") + parts = headers.split(key + ": ") + if len(parts) > 1: + headers = headers.replace(parts[1].split("\r\n")[0], value) + else: + headers = headers + key + ": " + value + "\r\n" + return (headers + "\r\n").encode() -ZAMENYAT_BACKLOG = 100 -ZAMENYAT_THREAD_COUNT = 2500 -ZAMENYAT_BUFFER_SIZE = 4096 -ZAMENYAT_HEADER_MAX_LENGTH = 4096 * 2 +def get_header_value(headers, key): + headers = headers.decode("utf-8") + try: + parts = headers.split(key + ": ") + return parts[1].split("\r\n")[0] + except: + return None +def get_content_length(headers): + try: + return int(get_header_value(headers, "Content-Length")) + except: + return 0 -class AsyncWriter: +def send_all(sock, data): + while data: + sent = sock.send(data) + data = data[sent:] - def __init__(self, writer, buffer_size=ZAMENYAT_BUFFER_SIZE, debug=False): - self.debug = debug - self.writer = writer - self.buffer_size = buffer_size - self.drain = self.writer.drain - self.close = self.writer.close - self.wait_closed = self.writer.wait_closed +class HTTPDocument: + def __init__(self, headers, data): + self.headers = headers + self.data = data + self.original_content_length = get_content_length(headers) + self.content_length = self.original_content_length - async def write(self, data): + def replace(self, old, new): + print("Replaced") + self.data = self.data.replace(old.encode(), new.encode()) + if self.original_content_length != len(self.data): + self.headers = set_header_value(self.headers, "Content-Length", len(self.data)) + return True + return False - while data: - chunk_size = self.buffer_size if len(data) > self.buffer_size else len(data) - chunk = data[:chunk_size] - self.writer.write(chunk) - if self.debug: - print("Write chunk:", chunk) - data = data[chunk_size:] - await self.writer.drain() + @property + def content(self): + return self.data +class Protocol: + def __init__(self, document, downstream, upstream): + self.upstream = upstream + self.downstream = downstream + self.document = document + self.bytes_sent = 0 + + def stream(self): + while self.bytes_sent != self.document.content_length: + chunk = self.downstream.recv(1) + self.bytes_sent += 1 + self.upstream.sendall(chunk) -class AsyncReader: - - def __init__(self, reader, debug=False): - self.reader = reader - self.buffer = b"" - self.debug = debug - - async def read_until(self, to_match): - buffer = b"" - while to_match not in buffer: - chunk = await self.read() - if not chunk: - return None - buffer += chunk - match_start = buffer.find(to_match) - data = buffer[: match_start + len(to_match)] - await self.unread(buffer[match_start + len(to_match) :]) - return data - - async def read(self, buffer_size=ZAMENYAT_BUFFER_SIZE, exact=False): - buffer_size - len(self.buffer) - while len(self.buffer) < buffer_size: - chunk_size = buffer_size - len(self.buffer) - chunk = await self.reader.read(chunk_size) - if not chunk: - return None - - if self.debug: - print("Read chunk:", chunk) - self.buffer += chunk - if not exact: - break - buffer_size = ( - len(self.buffer) if len(self.buffer) < buffer_size else buffer_size - ) - data = self.buffer[:buffer_size] - self.buffer = self.buffer[buffer_size:] - - return data - - async def unread(self, data): - if not data: - return - if hasattr(data, "encode"): - data = data.encode() - self.buffer = data + self.buffer - - -class Socket: - - def __init__(self, reader, writer, buffer_size, debug=False): - self.debug = debug - self.reader = AsyncReader(reader, debug=self.debug) - self.writer = AsyncWriter(writer, debug=self.debug) - self.read = self.reader.read - self.read_until = self.reader.read_until - self.unread = self.reader.unread - self.write = self.writer.write - self.drain = self.writer.drain - self.close = self.writer.close - self.wait_closed = self.writer.wait_closed - - -class Application: - - def __init__(self, upstream_host, upstream_port, silent=False, *args, **kwargs): - self.upstream_host = upstream_host - self.upstream_port = upstream_port - self.server = None - self.silent = silent - self.host = None - self.port = None - self.executor = None - self.buffer_size = ZAMENYAT_BUFFER_SIZE - self.header_max_length = ZAMENYAT_HEADER_MAX_LENGTH - self.connection_count = 0 - self.total_connection_count = 0 - super().__init__(*args, **kwargs) - - async def get_headers(self, reader): - headers = await reader.read_until(b"\r\n\r\n") - if not headers: - return None, None - headers = headers[:-2] - header_dict = {} - req_resp, *headers = headers.split(b"\r\n") - for header_line in headers[:-1]: - key, *value = header_line.split(b": ") - key = key.decode() - value = ": ".join([value.decode() for value in value]) - header_dict[key] = int(value) if value.isdigit() else value - return req_resp.decode(), header_dict - - def header_dict_to_bytes(self, req_resp, headers): - header_list = [req_resp] - for key, value in headers.items(): - header_list.append(f"{key}: {value}") - header_list.append("\r\n") - return ("\r\n".join(header_list)).encode() - - async def stream(self, reader, writer, is_websocket=False): - global headers - try: - reader = Socket(reader, writer, ZAMENYAT_BUFFER_SIZE) - writer = Socket(reader, writer, ZAMENYAT_BUFFER_SIZE) - while True: - req_resp, headers = None, None - data = b"" - if not is_websocket: - req_resp, headers = await self.get_headers(reader) - if not headers: - return None - if headers: - if "Content-Length" in headers: - while len(data) != headers["Content-Length"]: - chunk_size = ( - headers["Content-Length"] - len(data) - if self.buffer_size - > headers["Content-Length"] - len(data) - else self.buffer_size - ) - chunk = await reader.read(chunk_size) - if not chunk: - data = None - return None - data += chunk - await writer.write(self.header_dict_to_bytes(req_resp, headers)) - await writer.drain() - if data: - await writer.write(data) - else: - data = await reader.read() - await writer.write(data) - if not is_websocket: - break - except asyncio.CancelledError: - pass - finally: - pass - return headers - # writer.close() - # await writer.wait_closed() - - async def handle_client(self, reader, writer): - self.connection_count += 1 - self.total_connection_count += 1 - connection_nr = self.total_connection_count - - upstream_reader, upstream_writer = await asyncio.open_connection( - self.upstream_host, self.upstream_port - ) - - is_websocket = False - +def read_until(sock, delim): + data = b"" + try: while True: - time_start = time.time() - if not self.silent: - print( - f"Connected to upstream #{connection_nr} server {self.upstream_host}:{self.upstream_port} #{self.connection_count} Time: {get_timestamp()}" - ) + d = sock.recv(1) + data += d + if data.endswith(delim): + return data + except Exception as ex: + print(ex) + return None - if is_websocket: - await asyncio.gather( - self.stream(reader, upstream_writer, is_websocket), - self.stream(upstream_reader, writer, is_websocket), - ) - if not self.silent: - print( - f"Disconnected upstream #{connection_nr} server {self.upstream_host}:{self.upstream_port} #{self.connection_count} Duration: {time_duration:.5f}s" - ) - break - else: - request_headers = await self.stream( - reader, upstream_writer, is_websocket - ) - await self.stream(upstream_reader, writer, is_websocket) +def communicate(sock, config): + upstream = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + upstream.connect(("127.0.0.1", 8082)) + while True: + headers = read_until(sock, b"\r\n\r\n") + print(headers) + if not headers: + sock.close() + break - keep_alive = False - if request_headers: - if ( - request_headers.get("Connection") == "keep-alive" - ): # and not headers.get('Upgrade-Insecure-Requests'): - keep_alive = True - if request_headers.get("Upgrade") == "websocket": - is_websocket = True + upstream.sendall(headers) + content_length = get_content_length(headers) + if content_length: + doc = HTTPDocument(headers, b'') + protocol = Protocol(doc, sock, upstream) + protocol.stream() - time_end = time.time() - time_duration = time_end - time_start - if not self.silent: - print( - f"Disconnected upstream #{connection_nr} server {self.upstream_host}:{self.upstream_port} #{self.connection_count} Duration: {time_duration:.5f}s" - ) + headers = read_until(upstream, b"\r\n\r\n") + for key, value in config.get("upstream", {}).items(): + headers = headers.replace(key.encode(), value.encode()) + print(headers) + sock.sendall(headers) + content_length = get_content_length(headers) + handled = content_length > 0 + if content_length: + doc = HTTPDocument(headers, b'') + protocol = Protocol(doc, upstream, sock) + protocol.stream() - if not any([keep_alive, is_websocket]): + if get_header_value(headers, "Connection") == "close": + sock.close() + return + + if get_header_value(headers, "Upgrade") == "websocket": + sock.close() + return + + if get_header_value(headers, "Transfer-Encoding") == "chunked": + while True: + headers = read_until(upstream, b"\r\n") + content_length = int(headers[:-2], 16) + if not content_length: + sock.sendall(b"\r\n") break + data = b'' + while len(data) < content_length: + chunk = upstream.recv(1) + data += chunk + + for key, value in config.get("downstream", {}).items(): + data = data.replace(key.encode(), value.encode()) + content_length = len(data) + data = hex(content_length)[2:].encode() + b"\r\n" + data + data += upstream.recv(2) + sock.sendall(data) + if not data: + break + + print(data) + upstream.close() + sock.close() + break + + if not handled: + upstream.close() + sock.close() + break + + if get_header_value(headers, "Connection") == "keep-alive": + continue + break - self.connection_count -= 1 - - writer.close() - upstream_writer.close() - await writer.wait_closed() - await upstream_writer.wait_closed() - - def upgrade_executor(self, thread_count): - self.executor = Executor(max_workers=thread_count) - loop = asyncio.get_running_loop() - loop.set_default_executor(self.executor) - return self.executor - - async def serve_async(self, host, port, backlog=ZAMENYAT_BACKLOG): - self.upgrade_executor(ZAMENYAT_THREAD_COUNT) - self.host = host - self.port = port - self.server = await asyncio.start_server( - self.handle_client, self.host, self.port, backlog=backlog - ) - async with self.server: - await self.server.serve_forever() - - def serve(self, host, port, backlog=ZAMENYAT_BACKLOG): +def reap_zombie_processes(): + while True: try: - asyncio.run(self.serve_async(host, port, backlog=backlog)) - except KeyboardInterrupt: - print("Shutted down server") + pid, _ = os.waitpid(-1, os.WNOHANG) + if pid == 0: + break + except ChildProcessError: + break + +async def serve(host, port, config): + config = json.loads(pathlib.Path(config).read_text()) + signal.signal(signal.SIGCHLD, lambda signum, frame: reap_zombie_processes()) + + server = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + executor = Executor(100) + server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + server.bind((host, port)) + server.listen() + clients = [] + while True: + client, address = server.accept() + clients.append(client) + print(f"Connection from {address}") + pid = os.fork() + if pid == 0: + server.close() + communicate(client, config) + os._exit(0) + else: + client.close() + +asyncio.run(serve("0.0.0.0", 3046, ".replace.json")) \ No newline at end of file