Working version.

This commit is contained in:
retoor 2024-12-29 21:05:43 +01:00
parent 3069fc0168
commit b9c9fbb11d
4 changed files with 214 additions and 248 deletions

4
.gitignore vendored
View File

@ -1,4 +1,8 @@
# ---> Python
.history
.vscode
.replace.json
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]

View File

@ -24,6 +24,9 @@ build:
$(PYTHON) -m build
run:
$(BIN)$(APP_NAME) --host="0.0.0.0" --port=3046 --upstream-host="127.0.0.1" --upstream-port=9999
$(BIN)$(APP_NAME) --host="0.0.0.0" --port=3046 --upstream-host="localhost" --upstream-port=8082
run2:
$(BIN)$(APP_NAME) --host="0.0.0.0" --port=3046 --upstream-host="localhost" --upstream-port=9999

View File

@ -1,22 +1,42 @@
import argparse
# Written by retoor@molodetz.nl
# This script sets up a command-line interface to run a server that replaces sensitive content using the Application from the zamenyat.app module.
# Imports: The script imports argparse and Application from the zamenyat.app module.
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import argparse
from zamenyat.app import Application
parser = argparse.ArgumentParser(description="Zamenyat sensitive content replacer.")
parser.add_argument("--host", required=True, type=str)
parser.add_argument("--port", required=True, type=int)
parser.add_argument("--upstream-host", required=True, type=str)
parser.add_argument("--upstream-port", required=True, type=int)
def main():
args = parser.parse_args()
app = Application(
upstream_host=args.upstream_host, upstream_port=args.upstream_port
)
app = Application(upstream_host=args.upstream_host, upstream_port=args.upstream_port)
app.serve(host=args.host, port=args.port)
if __name__ == "__main__":
main()

View File

@ -1,258 +1,197 @@
# Written by retoor@molodetz.nl
# This script is a network proxy that intercepts HTTP communication, replaces specific content in headers and data, and passes communication between clients and an upstream server. It supports chunked transfer encoding, keep-alive connections, and forking for client connections.
# Import summary: Uses socket, asyncio, pathlib, os, signal, and json modules from Python standard library.
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
import socket
import asyncio
import time
import pathlib
import os
import signal
import json
from concurrent.futures import ThreadPoolExecutor as Executor
from app.app import get_timestamp
def set_header_value(headers, key, value):
value = str(value)
headers = headers.decode("utf-8").rstrip("\r\n\r\n")
parts = headers.split(key + ": ")
if len(parts) > 1:
headers = headers.replace(parts[1].split("\r\n")[0], value)
else:
headers = headers + key + ": " + value + "\r\n"
return (headers + "\r\n").encode()
ZAMENYAT_BACKLOG = 100
ZAMENYAT_THREAD_COUNT = 2500
ZAMENYAT_BUFFER_SIZE = 4096
ZAMENYAT_HEADER_MAX_LENGTH = 4096 * 2
def get_header_value(headers, key):
headers = headers.decode("utf-8")
try:
parts = headers.split(key + ": ")
return parts[1].split("\r\n")[0]
except:
return None
def get_content_length(headers):
try:
return int(get_header_value(headers, "Content-Length"))
except:
return 0
class AsyncWriter:
def __init__(self, writer, buffer_size=ZAMENYAT_BUFFER_SIZE, debug=False):
self.debug = debug
self.writer = writer
self.buffer_size = buffer_size
self.drain = self.writer.drain
self.close = self.writer.close
self.wait_closed = self.writer.wait_closed
async def write(self, data):
def send_all(sock, data):
while data:
chunk_size = self.buffer_size if len(data) > self.buffer_size else len(data)
chunk = data[:chunk_size]
self.writer.write(chunk)
if self.debug:
print("Write chunk:", chunk)
data = data[chunk_size:]
await self.writer.drain()
sent = sock.send(data)
data = data[sent:]
class HTTPDocument:
def __init__(self, headers, data):
self.headers = headers
self.data = data
self.original_content_length = get_content_length(headers)
self.content_length = self.original_content_length
class AsyncReader:
def replace(self, old, new):
print("Replaced")
self.data = self.data.replace(old.encode(), new.encode())
if self.original_content_length != len(self.data):
self.headers = set_header_value(self.headers, "Content-Length", len(self.data))
return True
return False
def __init__(self, reader, debug=False):
self.reader = reader
self.buffer = b""
self.debug = debug
@property
def content(self):
return self.data
async def read_until(self, to_match):
buffer = b""
while to_match not in buffer:
chunk = await self.read()
if not chunk:
return None
buffer += chunk
match_start = buffer.find(to_match)
data = buffer[: match_start + len(to_match)]
await self.unread(buffer[match_start + len(to_match) :])
return data
class Protocol:
def __init__(self, document, downstream, upstream):
self.upstream = upstream
self.downstream = downstream
self.document = document
self.bytes_sent = 0
async def read(self, buffer_size=ZAMENYAT_BUFFER_SIZE, exact=False):
buffer_size - len(self.buffer)
while len(self.buffer) < buffer_size:
chunk_size = buffer_size - len(self.buffer)
chunk = await self.reader.read(chunk_size)
if not chunk:
return None
def stream(self):
while self.bytes_sent != self.document.content_length:
chunk = self.downstream.recv(1)
self.bytes_sent += 1
self.upstream.sendall(chunk)
if self.debug:
print("Read chunk:", chunk)
self.buffer += chunk
if not exact:
break
buffer_size = (
len(self.buffer) if len(self.buffer) < buffer_size else buffer_size
)
data = self.buffer[:buffer_size]
self.buffer = self.buffer[buffer_size:]
return data
async def unread(self, data):
if not data:
return
if hasattr(data, "encode"):
data = data.encode()
self.buffer = data + self.buffer
class Socket:
def __init__(self, reader, writer, buffer_size, debug=False):
self.debug = debug
self.reader = AsyncReader(reader, debug=self.debug)
self.writer = AsyncWriter(writer, debug=self.debug)
self.read = self.reader.read
self.read_until = self.reader.read_until
self.unread = self.reader.unread
self.write = self.writer.write
self.drain = self.writer.drain
self.close = self.writer.close
self.wait_closed = self.writer.wait_closed
class Application:
def __init__(self, upstream_host, upstream_port, silent=False, *args, **kwargs):
self.upstream_host = upstream_host
self.upstream_port = upstream_port
self.server = None
self.silent = silent
self.host = None
self.port = None
self.executor = None
self.buffer_size = ZAMENYAT_BUFFER_SIZE
self.header_max_length = ZAMENYAT_HEADER_MAX_LENGTH
self.connection_count = 0
self.total_connection_count = 0
super().__init__(*args, **kwargs)
async def get_headers(self, reader):
headers = await reader.read_until(b"\r\n\r\n")
if not headers:
return None, None
headers = headers[:-2]
header_dict = {}
req_resp, *headers = headers.split(b"\r\n")
for header_line in headers[:-1]:
key, *value = header_line.split(b": ")
key = key.decode()
value = ": ".join([value.decode() for value in value])
header_dict[key] = int(value) if value.isdigit() else value
return req_resp.decode(), header_dict
def header_dict_to_bytes(self, req_resp, headers):
header_list = [req_resp]
for key, value in headers.items():
header_list.append(f"{key}: {value}")
header_list.append("\r\n")
return ("\r\n".join(header_list)).encode()
async def stream(self, reader, writer, is_websocket=False):
global headers
try:
reader = Socket(reader, writer, ZAMENYAT_BUFFER_SIZE)
writer = Socket(reader, writer, ZAMENYAT_BUFFER_SIZE)
while True:
req_resp, headers = None, None
def read_until(sock, delim):
data = b""
if not is_websocket:
req_resp, headers = await self.get_headers(reader)
if not headers:
return None
if headers:
if "Content-Length" in headers:
while len(data) != headers["Content-Length"]:
chunk_size = (
headers["Content-Length"] - len(data)
if self.buffer_size
> headers["Content-Length"] - len(data)
else self.buffer_size
)
chunk = await reader.read(chunk_size)
if not chunk:
data = None
return None
data += chunk
await writer.write(self.header_dict_to_bytes(req_resp, headers))
await writer.drain()
if data:
await writer.write(data)
else:
data = await reader.read()
await writer.write(data)
if not is_websocket:
break
except asyncio.CancelledError:
pass
finally:
pass
return headers
# writer.close()
# await writer.wait_closed()
async def handle_client(self, reader, writer):
self.connection_count += 1
self.total_connection_count += 1
connection_nr = self.total_connection_count
upstream_reader, upstream_writer = await asyncio.open_connection(
self.upstream_host, self.upstream_port
)
is_websocket = False
while True:
time_start = time.time()
if not self.silent:
print(
f"Connected to upstream #{connection_nr} server {self.upstream_host}:{self.upstream_port} #{self.connection_count} Time: {get_timestamp()}"
)
if is_websocket:
await asyncio.gather(
self.stream(reader, upstream_writer, is_websocket),
self.stream(upstream_reader, writer, is_websocket),
)
if not self.silent:
print(
f"Disconnected upstream #{connection_nr} server {self.upstream_host}:{self.upstream_port} #{self.connection_count} Duration: {time_duration:.5f}s"
)
break
else:
request_headers = await self.stream(
reader, upstream_writer, is_websocket
)
await self.stream(upstream_reader, writer, is_websocket)
keep_alive = False
if request_headers:
if (
request_headers.get("Connection") == "keep-alive"
): # and not headers.get('Upgrade-Insecure-Requests'):
keep_alive = True
if request_headers.get("Upgrade") == "websocket":
is_websocket = True
time_end = time.time()
time_duration = time_end - time_start
if not self.silent:
print(
f"Disconnected upstream #{connection_nr} server {self.upstream_host}:{self.upstream_port} #{self.connection_count} Duration: {time_duration:.5f}s"
)
if not any([keep_alive, is_websocket]):
break
self.connection_count -= 1
writer.close()
upstream_writer.close()
await writer.wait_closed()
await upstream_writer.wait_closed()
def upgrade_executor(self, thread_count):
self.executor = Executor(max_workers=thread_count)
loop = asyncio.get_running_loop()
loop.set_default_executor(self.executor)
return self.executor
async def serve_async(self, host, port, backlog=ZAMENYAT_BACKLOG):
self.upgrade_executor(ZAMENYAT_THREAD_COUNT)
self.host = host
self.port = port
self.server = await asyncio.start_server(
self.handle_client, self.host, self.port, backlog=backlog
)
async with self.server:
await self.server.serve_forever()
def serve(self, host, port, backlog=ZAMENYAT_BACKLOG):
try:
asyncio.run(self.serve_async(host, port, backlog=backlog))
except KeyboardInterrupt:
print("Shutted down server")
while True:
d = sock.recv(1)
data += d
if data.endswith(delim):
return data
except Exception as ex:
print(ex)
return None
def communicate(sock, config):
upstream = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
upstream.connect(("127.0.0.1", 8082))
while True:
headers = read_until(sock, b"\r\n\r\n")
print(headers)
if not headers:
sock.close()
break
upstream.sendall(headers)
content_length = get_content_length(headers)
if content_length:
doc = HTTPDocument(headers, b'')
protocol = Protocol(doc, sock, upstream)
protocol.stream()
headers = read_until(upstream, b"\r\n\r\n")
for key, value in config.get("upstream", {}).items():
headers = headers.replace(key.encode(), value.encode())
print(headers)
sock.sendall(headers)
content_length = get_content_length(headers)
handled = content_length > 0
if content_length:
doc = HTTPDocument(headers, b'')
protocol = Protocol(doc, upstream, sock)
protocol.stream()
if get_header_value(headers, "Connection") == "close":
sock.close()
return
if get_header_value(headers, "Upgrade") == "websocket":
sock.close()
return
if get_header_value(headers, "Transfer-Encoding") == "chunked":
while True:
headers = read_until(upstream, b"\r\n")
content_length = int(headers[:-2], 16)
if not content_length:
sock.sendall(b"\r\n")
break
data = b''
while len(data) < content_length:
chunk = upstream.recv(1)
data += chunk
for key, value in config.get("downstream", {}).items():
data = data.replace(key.encode(), value.encode())
content_length = len(data)
data = hex(content_length)[2:].encode() + b"\r\n" + data
data += upstream.recv(2)
sock.sendall(data)
if not data:
break
print(data)
upstream.close()
sock.close()
break
if not handled:
upstream.close()
sock.close()
break
if get_header_value(headers, "Connection") == "keep-alive":
continue
break
def reap_zombie_processes():
while True:
try:
pid, _ = os.waitpid(-1, os.WNOHANG)
if pid == 0:
break
except ChildProcessError:
break
async def serve(host, port, config):
config = json.loads(pathlib.Path(config).read_text())
signal.signal(signal.SIGCHLD, lambda signum, frame: reap_zombie_processes())
server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
executor = Executor(100)
server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
server.bind((host, port))
server.listen()
clients = []
while True:
client, address = server.accept()
clients.append(client)
print(f"Connection from {address}")
pid = os.fork()
if pid == 0:
server.close()
communicate(client, config)
os._exit(0)
else:
client.close()
asyncio.run(serve("0.0.0.0", 3046, ".replace.json"))