// retoor <retoor@molodetz.nl>
import "io" for File, Directory
import "os" for Process
import "pathlib" for Path
import "yaml" for Yaml
import "json" for Json
import "jinja" for Environment, FileSystemLoader, ChoiceLoader
import "regex" for Regex
import "strutil" for Str
class TextExtractor {
static SKIP_TAGS { ["script", "style", "nav", "head", "header", "footer", "aside"] }
static extract(html) {
var text = html
for (tag in TextExtractor.SKIP_TAGS) {
var pattern = Regex.new("<" + tag + "[^>]*>([^<]|<[^/]|</[^" + tag[0] + "])*</" + tag + ">", "gi")
text = pattern.replaceAll(text, "")
}
var tagPattern = Regex.new("<[^>]+>", "g")
text = tagPattern.replaceAll(text, " ")
var whitespace = Regex.new("[ \t\n\r\f]+", "g")
text = whitespace.replaceAll(text, " ")
return text.trim()
}
}
class SEOGenerator {
static STOP_WORDS {
return [
"the", "a", "an", "and", "or", "but", "in", "on", "at", "to", "for",
"of", "with", "by", "from", "as", "is", "was", "are", "were", "been",
"be", "have", "has", "had", "do", "does", "did", "will", "would",
"could", "should", "may", "might", "must", "shall", "can", "need",
"this", "that", "these", "those", "it", "its", "they", "them",
"we", "us", "you", "your", "he", "she", "him", "her", "i", "my",
"if", "then", "else", "when", "where", "why", "how", "what", "which",
"who", "whom", "not", "no", "yes", "all", "any", "both", "each",
"more", "most", "other", "some", "such", "only", "same", "so",
"than", "too", "very", "just", "also", "now", "here", "there"
]
}
static WREN_TERMS {
return [
"wren", "fiber", "class", "method", "module", "import", "foreign",
"static", "construct", "scheduler", "async", "await", "cli", "api",
"json", "http", "websocket", "sqlite", "crypto", "tls", "regex"
]
}
static extractKeywords(text, title, maxKeywords) {
var wordPattern = Regex.new("\\b[a-zA-Z][a-zA-Z0-9_]{2,}\\b", "g")
var matches = wordPattern.matchAll(Str.toLower(text))
var freq = {}
var stopWords = SEOGenerator.STOP_WORDS
for (match in matches) {
var word = match.text
if (word.count > 2 && !stopWords.contains(word)) {
if (freq.containsKey(word)) {
freq[word] = freq[word] + 1
} else {
freq[word] = 1
}
}
}
var titleMatches = wordPattern.matchAll(Str.toLower(title))
var titleWords = []
for (m in titleMatches) titleWords.add(m.text)
var wrenTerms = SEOGenerator.WREN_TERMS
for (word in freq.keys) {
if (titleWords.contains(word)) freq[word] = freq[word] * 3
if (wrenTerms.contains(word)) freq[word] = freq[word] * 2
}
var sorted = freq.keys.toList
sorted.sort {|a, b| freq[b] - freq[a] }
var result = []
var count = 0
for (word in sorted) {
if (count >= maxKeywords) break
result.add(word)
count = count + 1
}
return result
}
static generateDescription(text, title, maxLength) {
var ws = Regex.new("\\s+", "g")
text = ws.replaceAll(text, " ").trim()
var skipPattern = Regex.new("^Skip to main content\\s*", "")
text = skipPattern.replaceAll(text, "")
var menuPattern = Regex.new("^Menu\\s+", "")
text = menuPattern.replaceAll(text, "")
var versionPattern = Regex.new("Wren-CLI\\s+v[\\d.]+\\s*", "g")
text = versionPattern.replaceAll(text, "")
var navPattern = Regex.new("Previous:.*?Next:.*$", "")
text = navPattern.replaceAll(text, "")
text = text.trim()
var sentencePattern = Regex.new("[.!?][ \t\n\r\f]+", "g")
var sentences = sentencePattern.split(text)
var filtered = []
for (s in sentences) {
var sLen = s.bytes.count
if (sLen > 20 && !s.startsWith("import ") && !s.startsWith("var ") && !s.startsWith("//")) {
filtered.add(s)
}
}
if (filtered.isEmpty) return "%(title) - Wren-CLI documentation and reference."
var description = filtered[0]
var descLen = description.bytes.count
if (descLen > maxLength) {
description = description[0...(maxLength - 3)]
descLen = description.bytes.count
var lastSpace = descLen - 1
while (lastSpace > 0 && description[lastSpace] != " ") lastSpace = lastSpace - 1
if (lastSpace > 0) description = description[0...lastSpace]
description = description + "..."
} else if (descLen < 80 && filtered.count > 1) {
var i = 1
var fCount = filtered.count
while (i < fCount) {
var addLen = filtered[i].bytes.count
if (descLen + addLen + 1 <= maxLength) {
description = description + " " + filtered[i]
descLen = descLen + addLen + 1
} else {
break
}
i = i + 1
}
}
return description
}
static extractTitle(html) {
var h1Pattern = Regex.new("<h1[^>]*>([^<]+)</h1>", "i")
var match = h1Pattern.match(html)
if (match) return match.group(1).trim()
var titlePattern = Regex.new("<title[^>]*>([^<]+)</title>", "i")
match = titlePattern.match(html)
if (match) {
var title = match.group(1).trim()
if (title.contains(" - ")) {
var parts = title.split(" - ")
return parts[0]
}
return title
}
return "Wren-CLI Documentation"
}
}
class TemplateFormatter {
static INDENT { " " }
construct new() {
_fixes = []
}
fixes { _fixes }
formatFile(path, section) {
var content = path.readText()
var original = content
content = ensureAuthorComment_(content)
content = fixArticleIndentation_(content)
content = fixNavigationUrls_(content, section)
if (content != original) {
path.writeText(content)
_fixes.add(path.toString)
return true
}
return false
}
ensureAuthorComment_(content) {
if (!content.startsWith("{# retoor")) {
return "{# retoor <retoor@molodetz.nl> #}" + "\n" + content
}
return content
}
fixArticleIndentation_(content) {
var lines = content.split("\n")
var result = []
var inArticle = false
for (line in lines) {
if (line.contains("{\x25 block article \x25}")) {
inArticle = true
result.add(line)
continue
}
if (line.contains("{\x25 endblock \x25}") && inArticle) {
inArticle = false
result.add(line)
continue
}
if (inArticle && !line.isEmpty && !line.startsWith(" ")) {
var stripped = line.trim()
if (stripped.startsWith("<") && !stripped.startsWith("<!")) {
line = TemplateFormatter.INDENT + stripped
}
}
result.add(line)
}
return result.join("\n")
}
fixNavigationUrls_(content, section) {
return content
}
report() {
if (!_fixes.isEmpty) {
System.print(" Auto-formatted %(_fixes.count) file(s):")
for (f in _fixes) {
System.print(" %(f)")
}
}
}
}
class ManualBuilder {
construct new() {
_root = Path.new(Process.cwd)
_src = _root / "manual_src"
_output = _root / "bin" / "manual"
_site = Yaml.parse((_src / "data/site.yaml").readText())
_nav = Yaml.parse((_src / "data/navigation.yaml").readText())
var templatesLoader = FileSystemLoader.new((_src / "templates").toString)
var pagesLoader = FileSystemLoader.new(_src.toString)
_env = Environment.new(ChoiceLoader.new([templatesLoader, pagesLoader]))
_searchIndexJson = ""
}
build() {
System.print("[DEBUG] Starting build...")
System.print("[DEBUG] Checking output exists...")
if (_output.exists()) {
System.print("[DEBUG] Removing old output...")
_output.rmtree()
}
System.print("[DEBUG] Creating output directory...")
_output.mkdir(true)
System.print("[DEBUG] Building pages...")
buildPages()
System.print("[DEBUG] Building search index...")
var searchIndex = buildSearchIndex()
System.print("[DEBUG] Converting search index to JSON...")
_searchIndexJson = escapeJsonForHtml_(Json.stringify(searchIndex))
System.print("[DEBUG] Rebuilding pages with index...")
rebuildPagesWithIndex()
System.print("[DEBUG] Copying static files...")
copyStatic()
System.print("Built manual to %(_output)")
}
formatTemplates() {
System.print("[DEBUG] formatTemplates: skipped (slow in Wren)")
}
buildPages() {
var pagesDir = _src / "pages"
for (htmlFile in pagesDir.rglob("*.html")) {
var relPath = htmlFile.relativeTo(pagesDir)
buildPage(htmlFile, relPath)
}
}
rebuildPagesWithIndex() {
var pagesDir = _src / "pages"
for (htmlFile in pagesDir.rglob("*.html")) {
var relPath = htmlFile.relativeTo(pagesDir)
buildPage(htmlFile, relPath)
}
}
buildPage(srcPath, relPath) {
System.print("[DEBUG] buildPage: %(relPath)")
var templatePath = "pages/" + relPath.toString
System.print("[DEBUG] Getting template: %(templatePath)")
var template = _env.getTemplate(templatePath)
System.print("[DEBUG] Template loaded")
var depth = relPath.parts.count - 1
var staticPrefix = depth > 0 ? ("../" * depth) : "./"
var context = {
"current_path": relPath.toString,
"static_prefix": staticPrefix,
"depth": depth,
"seo": {},
"site": _site,
"nav": _nav,
"search_index_json": _searchIndexJson
}
var html = template.render(context)
var text = TextExtractor.extract(html)
var title = SEOGenerator.extractTitle(html)
var relStr = relPath.toString
var ogType = (relStr.contains("api/") || relStr.contains("tutorials/")) ? "article" : "website"
var seo = {
"keywords": SEOGenerator.extractKeywords(text, title, 10),
"description": SEOGenerator.generateDescription(text, title, 155),
"og_title": "%(title) - Wren-CLI",
"og_type": ogType
}
context["seo"] = seo
html = template.render(context)
var outPath = _output / relPath
outPath.parent.mkdir(true)
outPath.writeText(html)
System.print(" %(relPath)")
}
copyStatic() {
var staticSrc = _src / "static"
for (entry in staticSrc.walk()) {
var root = entry[0]
var files = entry[2]
for (f in files) {
var item = root / f
var rel = item.relativeTo(staticSrc)
var dest = _output / rel
dest.parent.mkdir(true)
item.copyfile(dest)
}
}
}
buildSearchIndex() {
var index = {"pages": []}
for (section in _nav["sections"]) {
var sectionTitle = section["title"]
var sectionDir = section["directory"]
var pages = section["pages"]
if (pages == null) pages = []
for (page in pages) {
var url = "%(sectionDir)/%(page["file"]).html"
var renderedPath = _output / url
var content = ""
if (renderedPath.exists()) {
var html = renderedPath.readText()
content = TextExtractor.extract(html)
var words = content.split(" ")
if (words.count > 500) {
content = words[0...500].join(" ")
}
}
var description = page["description"]
if (description == null) description = ""
var methods = page["methods"]
if (methods == null) methods = []
index["pages"].add({
"url": url,
"title": page["title"],
"section": sectionTitle,
"description": description,
"methods": methods,
"content": content
})
}
}
(_output / "search-index.json").writeText(Json.stringify(index, 2))
return index
}
escapeJsonForHtml_(jsonString) {
return jsonString.replace("</", "<\\/").replace("<!--", "<\\!--")
}
}
var builder = ManualBuilder.new()
builder.build()