#!/usr/bin/env python3
# retoor <retoor@molodetz.nl>
import json
import re
import shutil
import yaml
from pathlib import Path
from html.parser import HTMLParser
from jinja2 import Environment, FileSystemLoader, ChoiceLoader
class SEOGenerator:
STOP_WORDS = {
'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for',
'of', 'with', 'by', 'from', 'as', 'is', 'was', 'are', 'were', 'been',
'be', 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would',
'could', 'should', 'may', 'might', 'must', 'shall', 'can', 'need',
'this', 'that', 'these', 'those', 'it', 'its', 'they', 'them',
'we', 'us', 'you', 'your', 'he', 'she', 'him', 'her', 'i', 'my',
'if', 'then', 'else', 'when', 'where', 'why', 'how', 'what', 'which',
'who', 'whom', 'not', 'no', 'yes', 'all', 'any', 'both', 'each',
'more', 'most', 'other', 'some', 'such', 'only', 'same', 'so',
'than', 'too', 'very', 'just', 'also', 'now', 'here', 'there'
}
WREN_TERMS = {
'wren', 'fiber', 'class', 'method', 'module', 'import', 'foreign',
'static', 'construct', 'scheduler', 'async', 'await', 'cli', 'api',
'json', 'http', 'websocket', 'sqlite', 'crypto', 'tls', 'regex'
}
def extract_keywords(self, text, title, max_keywords=10):
words = re.findall(r'\b[a-zA-Z][a-zA-Z0-9_]{2,}\b', text.lower())
freq = {}
for word in words:
if word not in self.STOP_WORDS and len(word) > 2:
freq[word] = freq.get(word, 0) + 1
title_words = set(re.findall(r'\b[a-zA-Z][a-zA-Z0-9_]+\b', title.lower()))
for word in freq:
if word in title_words:
freq[word] *= 3
if word in self.WREN_TERMS:
freq[word] *= 2
sorted_words = sorted(freq.items(), key=lambda x: x[1], reverse=True)
return [word for word, _ in sorted_words[:max_keywords]]
def generate_description(self, text, title, max_length=155):
text = re.sub(r'\s+', ' ', text).strip()
text = re.sub(r'^Skip to main content\s*', '', text)
text = re.sub(r'^Menu\s+', '', text)
text = re.sub(r'Wren-CLI\s+v[\d.]+\s*', '', text)
text = re.sub(r'Previous:.*?Next:.*?$', '', text)
text = text.strip()
sentences = re.split(r'(?<=[.!?])\s+', text)
sentences = [s for s in sentences if len(s) > 20 and not s.startswith(('import ', 'var ', '//'))]
if not sentences:
return f"{title} - Wren-CLI documentation and reference."
description = sentences[0]
if len(description) > max_length:
description = description[:max_length-3].rsplit(' ', 1)[0] + '...'
elif len(description) < 80 and len(sentences) > 1:
for s in sentences[1:]:
if len(description) + len(s) + 1 <= max_length:
description += ' ' + s
else:
break
return description
def extract_title(self, html):
match = re.search(r'<h1[^>]*>([^<]+)</h1>', html)
if match:
return match.group(1).strip()
match = re.search(r'<title[^>]*>([^<]+)</title>', html)
if match:
title = match.group(1).strip()
if ' - ' in title:
return title.split(' - ')[0]
return title
return 'Wren-CLI Documentation'
class TextExtractor(HTMLParser):
def __init__(self):
super().__init__()
self.text = []
self.skip_tags = {'script', 'style', 'nav', 'head', 'header', 'footer', 'aside'}
self.skip_depth = 0
def handle_starttag(self, tag, attrs):
if tag in self.skip_tags:
self.skip_depth += 1
def handle_endtag(self, tag):
if tag in self.skip_tags and self.skip_depth > 0:
self.skip_depth -= 1
def handle_data(self, data):
if self.skip_depth == 0:
text = data.strip()
if text:
self.text.append(text)
def get_text(self):
return ' '.join(self.text)
class LinkExtractor(HTMLParser):
def __init__(self):
super().__init__()
self.links = []
def handle_starttag(self, tag, attrs):
attrs_dict = dict(attrs)
if tag == 'a' and 'href' in attrs_dict:
self.links.append(attrs_dict['href'])
elif tag in ('img', 'script') and 'src' in attrs_dict:
self.links.append(attrs_dict['src'])
elif tag == 'link' and 'href' in attrs_dict:
self.links.append(attrs_dict['href'])
def extract_text(html):
parser = TextExtractor()
parser.feed(html)
return parser.get_text()
def escape_json_for_html(json_string):
return (json_string
.replace('</', '<\\/')
.replace('<!--', '<\\!--'))
class TemplateFormatter:
INDENT = ' '
def __init__(self):
self.fixes = []
def format_file(self, path, section=''):
content = path.read_text()
original = content
content = self._ensure_author_comment(content)
content = self._fix_article_indentation(content)
content = self._fix_navigation_urls(content, section)
if content != original:
path.write_text(content)
self.fixes.append(str(path))
return True
return False
def _ensure_author_comment(self, content):
if not content.startswith('{# retoor'):
return '{# retoor <retoor@molodetz.nl> #}\n' + content
return content
def _fix_article_indentation(self, content):
lines = content.split('\n')
result = []
in_article = False
for line in lines:
if '{% block article %}' in line:
in_article = True
result.append(line)
continue
if '{% endblock %}' in line and in_article:
in_article = False
result.append(line)
continue
if in_article and line and not line.startswith(' '):
stripped = line.strip()
if stripped.startswith('<') and not stripped.startswith('<!'):
line = self.INDENT + stripped
result.append(line)
return '\n'.join(result)
def _fix_navigation_urls(self, content, section):
import re
if not section or section == '.':
return content
def fix_url(match):
prefix = match.group(1)
url = match.group(2)
if '/' in url:
return match.group(0)
return f'{prefix}{section}/{url}'
def fix_prev_line(match):
line = match.group(0)
if '"url": "index.html"' in line and '"title": "Home"' in line:
return line
return re.sub(r'("url":\s*")([a-z0-9_-]+\.html")', lambda m: m.group(1) + section + '/' + m.group(2) if '/' not in m.group(2) else m.group(0), line)
content = re.sub(
r'{% set prev_page = \{[^}]+\} %}',
fix_prev_line,
content
)
content = re.sub(
r'(next_page\s*=\s*\{"url":\s*")([^"]+\.html")',
fix_url,
content
)
return content
def report(self):
if self.fixes:
print(f" Auto-formatted {len(self.fixes)} file(s):")
for f in self.fixes:
print(f" {f}")
class ManualBuilder:
def __init__(self):
self.root = Path(__file__).parent.parent
self.src = self.root / 'manual_src'
self.output = self.root / 'bin' / 'manual'
self.site = self.load_yaml('data/site.yaml')
self.nav = self.load_yaml('data/navigation.yaml')
self.seo = SEOGenerator()
templates_loader = FileSystemLoader(str(self.src / 'templates'))
pages_loader = FileSystemLoader(str(self.src))
self.env = Environment(
loader=ChoiceLoader([templates_loader, pages_loader]),
trim_blocks=True,
lstrip_blocks=True
)
self.env.globals.update({
'site': self.site,
'nav': self.nav
})
def load_yaml(self, path):
with open(self.src / path) as f:
return yaml.safe_load(f)
def format_templates(self):
formatter = TemplateFormatter()
pages_dir = self.src / 'pages'
for html_file in pages_dir.rglob('*.html'):
rel_path = html_file.relative_to(pages_dir)
section = str(rel_path.parent) if rel_path.parent != Path('.') else ''
formatter.format_file(html_file, section)
formatter.report()
def build(self):
print("Formatting templates...")
self.format_templates()
if self.output.exists():
shutil.rmtree(self.output)
self.output.mkdir(parents=True)
self.build_pages()
search_index = self.build_search_index()
self.env.globals['search_index_json'] = escape_json_for_html(json.dumps(search_index))
self.rebuild_pages_with_index()
self.copy_static()
self.validate_links()
print(f"Built manual to {self.output}")
def build_pages(self):
pages_dir = self.src / 'pages'
for html_file in pages_dir.rglob('*.html'):
rel_path = html_file.relative_to(pages_dir)
self.build_page(html_file, rel_path)
def rebuild_pages_with_index(self):
pages_dir = self.src / 'pages'
for html_file in pages_dir.rglob('*.html'):
rel_path = html_file.relative_to(pages_dir)
self.build_page(html_file, rel_path)
def build_page(self, src_path, rel_path):
template_path = f'pages/{rel_path}'
template = self.env.get_template(template_path)
depth = len(rel_path.parts) - 1
static_prefix = '../' * depth if depth > 0 else './'
html = template.render(
current_path=str(rel_path),
static_prefix=static_prefix,
depth=depth,
seo={}
)
text = extract_text(html)
title = self.seo.extract_title(html)
seo = {
'keywords': self.seo.extract_keywords(text, title),
'description': self.seo.generate_description(text, title),
'og_title': f"{title} - Wren-CLI",
'og_type': 'article' if 'api/' in str(rel_path) or 'tutorials/' in str(rel_path) else 'website',
}
html = template.render(
current_path=str(rel_path),
static_prefix=static_prefix,
depth=depth,
seo=seo
)
out_path = self.output / rel_path
out_path.parent.mkdir(parents=True, exist_ok=True)
out_path.write_text(html)
print(f" {rel_path}")
def copy_static(self):
static_src = self.src / 'static'
for item in static_src.rglob('*'):
if item.is_file():
rel = item.relative_to(static_src)
dest = self.output / rel
dest.parent.mkdir(parents=True, exist_ok=True)
shutil.copy2(item, dest)
wasm_src = self.root / 'bin' / 'wasm'
wasm_dest = self.output / 'wasm'
if wasm_src.exists():
wasm_dest.mkdir(parents=True, exist_ok=True)
for item in wasm_src.iterdir():
if item.is_file():
shutil.copy2(item, wasm_dest / item.name)
print(f" Copied WASM files to {wasm_dest}")
def build_search_index(self):
index = {'pages': []}
for section in self.nav['sections']:
section_title = section['title']
section_dir = section['directory']
for page in section.get('pages', []):
if section_dir == ".":
url = f"{page['file']}.html"
else:
url = f"{section_dir}/{page['file']}.html"
rendered_path = self.output / url
content = ''
if rendered_path.exists():
html = rendered_path.read_text()
content = extract_text(html)
content = ' '.join(content.split()[:500])
index['pages'].append({
'url': url,
'title': page['title'],
'section': section_title,
'description': page.get('description', ''),
'methods': page.get('methods', []),
'content': content
})
(self.output / 'search-index.json').write_text(
json.dumps(index, indent=2)
)
return index
def _is_local_link(self, link):
if not link:
return False
if link.startswith(('http://', 'https://', 'mailto:', 'javascript:', '#', 'data:')):
return False
return True
def _resolve_link(self, base_dir, link):
link = link.split('#')[0].split('?')[0]
if not link:
return base_dir / 'index.html'
return (base_dir / link).resolve()
def validate_links(self):
broken = []
for html_file in self.output.rglob('*.html'):
rel_path = html_file.relative_to(self.output)
html = html_file.read_text()
extractor = LinkExtractor()
extractor.feed(html)
for link in extractor.links:
if self._is_local_link(link):
target = self._resolve_link(html_file.parent, link)
if not target.exists():
broken.append((rel_path, link))
if broken:
print("Broken links found:")
for page, link in broken:
print(f" {page}: {link}")
raise SystemExit(1)
def main():
builder = ManualBuilder()
builder.build()
if __name__ == '__main__':
main()