wren/build_manual.wren at main

 // retoor <retoor@molodetz.nl>
 import "io" for File, Directory
 import "os" for Process
 import "pathlib" for Path
 import "yaml" for Yaml
 import "json" for Json
 import "jinja" for Environment, FileSystemLoader, ChoiceLoader
 import "regex" for Regex
 import "strutil" for Str
 class TextExtractor {
   static SKIP_TAGS { ["script", "style", "nav", "head", "header", "footer", "aside"] }
   static extract(html) {
     var text = html
     for (tag in TextExtractor.SKIP_TAGS) {
       var pattern = Regex.new("<" + tag + "[^>]*>([^<]|<[^/]|</[^" + tag[0] + "])*</" + tag + ">", "gi")
       text = pattern.replaceAll(text, "")
     }
     var tagPattern = Regex.new("<[^>]+>", "g")
     text = tagPattern.replaceAll(text, " ")
     var whitespace = Regex.new("[ \t\n\r\f]+", "g")
     text = whitespace.replaceAll(text, " ")
     return text.trim()
   }
 }
 class SEOGenerator {
   static STOP_WORDS {
     return [
       "the", "a", "an", "and", "or", "but", "in", "on", "at", "to", "for",
       "of", "with", "by", "from", "as", "is", "was", "are", "were", "been",
       "be", "have", "has", "had", "do", "does", "did", "will", "would",
       "could", "should", "may", "might", "must", "shall", "can", "need",
       "this", "that", "these", "those", "it", "its", "they", "them",
       "we", "us", "you", "your", "he", "she", "him", "her", "i", "my",
       "if", "then", "else", "when", "where", "why", "how", "what", "which",
       "who", "whom", "not", "no", "yes", "all", "any", "both", "each",
       "more", "most", "other", "some", "such", "only", "same", "so",
       "than", "too", "very", "just", "also", "now", "here", "there"
     ]
   }
   static WREN_TERMS {
     return [
       "wren", "fiber", "class", "method", "module", "import", "foreign",
       "static", "construct", "scheduler", "async", "await", "cli", "api",
       "json", "http", "websocket", "sqlite", "crypto", "tls", "regex"
     ]
   }
   static extractKeywords(text, title, maxKeywords) {
     var wordPattern = Regex.new("\\b[a-zA-Z][a-zA-Z0-9_]{2,}\\b", "g")
     var matches = wordPattern.matchAll(Str.toLower(text))
     var freq = {}
     var stopWords = SEOGenerator.STOP_WORDS
     for (match in matches) {
       var word = match.text
       if (word.count > 2 && !stopWords.contains(word)) {
         if (freq.containsKey(word)) {
           freq[word] = freq[word] + 1
         } else {
           freq[word] = 1
         }
       }
     }
     var titleMatches = wordPattern.matchAll(Str.toLower(title))
     var titleWords = []
     for (m in titleMatches) titleWords.add(m.text)
     var wrenTerms = SEOGenerator.WREN_TERMS
     for (word in freq.keys) {
       if (titleWords.contains(word)) freq[word] = freq[word] * 3
       if (wrenTerms.contains(word)) freq[word] = freq[word] * 2
     }
     var sorted = freq.keys.toList
     sorted.sort {|a, b| freq[b] - freq[a] }
     var result = []
     var count = 0
     for (word in sorted) {
       if (count >= maxKeywords) break
       result.add(word)
       count = count + 1
     }
     return result
   }
   static generateDescription(text, title, maxLength) {
     var ws = Regex.new("\\s+", "g")
     text = ws.replaceAll(text, " ").trim()
     var skipPattern = Regex.new("^Skip to main content\\s*", "")
     text = skipPattern.replaceAll(text, "")
     var menuPattern = Regex.new("^Menu\\s+", "")
     text = menuPattern.replaceAll(text, "")
     var versionPattern = Regex.new("Wren-CLI\\s+v[\\d.]+\\s*", "g")
     text = versionPattern.replaceAll(text, "")
     var navPattern = Regex.new("Previous:.*?Next:.*$", "")
     text = navPattern.replaceAll(text, "")
     text = text.trim()
     var sentencePattern = Regex.new("[.!?][ \t\n\r\f]+", "g")
     var sentences = sentencePattern.split(text)
     var filtered = []
     for (s in sentences) {
       var sLen = s.bytes.count
       if (sLen > 20 && !s.startsWith("import ") && !s.startsWith("var ") && !s.startsWith("//")) {
         filtered.add(s)
       }
     }
     if (filtered.isEmpty) return "%(title) - Wren-CLI documentation and reference."
     var description = filtered[0]
     var descLen = description.bytes.count
     if (descLen > maxLength) {
       description = description[0...(maxLength - 3)]
       descLen = description.bytes.count
       var lastSpace = descLen - 1
       while (lastSpace > 0 && description[lastSpace] != " ") lastSpace = lastSpace - 1
       if (lastSpace > 0) description = description[0...lastSpace]
       description = description + "..."
     } else if (descLen < 80 && filtered.count > 1) {
       var i = 1
       var fCount = filtered.count
       while (i < fCount) {
         var addLen = filtered[i].bytes.count
         if (descLen + addLen + 1 <= maxLength) {
           description = description + " " + filtered[i]
           descLen = descLen + addLen + 1
         } else {
           break
         }
         i = i + 1
       }
     }
     return description
   }
   static extractTitle(html) {
     var h1Pattern = Regex.new("<h1[^>]*>([^<]+)</h1>", "i")
     var match = h1Pattern.match(html)
     if (match) return match.group(1).trim()
     var titlePattern = Regex.new("<title[^>]*>([^<]+)</title>", "i")
     match = titlePattern.match(html)
     if (match) {
       var title = match.group(1).trim()
       if (title.contains(" - ")) {
         var parts = title.split(" - ")
         return parts[0]
       }
       return title
     }
     return "Wren-CLI Documentation"
   }
 }
 class TemplateFormatter {
   static INDENT { "                " }
   construct new() {
     _fixes = []
   }
   fixes { _fixes }
   formatFile(path, section) {
     var content = path.readText()
     var original = content
     content = ensureAuthorComment_(content)
     content = fixArticleIndentation_(content)
     content = fixNavigationUrls_(content, section)
     if (content != original) {
       path.writeText(content)
       _fixes.add(path.toString)
       return true
     }
     return false
   }
   ensureAuthorComment_(content) {
     if (!content.startsWith("{# retoor")) {
       return "{# retoor <retoor@molodetz.nl> #}" + "\n" + content
     }
     return content
   }
   fixArticleIndentation_(content) {
     var lines = content.split("\n")
     var result = []
     var inArticle = false
     for (line in lines) {
       if (line.contains("{\x25 block article \x25}")) {
         inArticle = true
         result.add(line)
         continue
       }
       if (line.contains("{\x25 endblock \x25}") && inArticle) {
         inArticle = false
         result.add(line)
         continue
       }
       if (inArticle && !line.isEmpty && !line.startsWith(" ")) {
         var stripped = line.trim()
         if (stripped.startsWith("<") && !stripped.startsWith("<!")) {
           line = TemplateFormatter.INDENT + stripped
         }
       }
       result.add(line)
     }
     return result.join("\n")
   }
   fixNavigationUrls_(content, section) {
     return content
   }
   report() {
     if (!_fixes.isEmpty) {
       System.print("  Auto-formatted %(_fixes.count) file(s):")
       for (f in _fixes) {
         System.print("    %(f)")
       }
     }
   }
 }
 class ManualBuilder {
   construct new() {
     _root = Path.new(Process.cwd)
     _src = _root / "manual_src"
     _output = _root / "bin" / "manual"
     _site = Yaml.parse((_src / "data/site.yaml").readText())
     _nav = Yaml.parse((_src / "data/navigation.yaml").readText())
     var templatesLoader = FileSystemLoader.new((_src / "templates").toString)
     var pagesLoader = FileSystemLoader.new(_src.toString)
     _env = Environment.new(ChoiceLoader.new([templatesLoader, pagesLoader]))
     _searchIndexJson = ""
   }
   build() {
     System.print("[DEBUG] Starting build...")
     System.print("[DEBUG] Checking output exists...")
     if (_output.exists()) {
       System.print("[DEBUG] Removing old output...")
       _output.rmtree()
     }
     System.print("[DEBUG] Creating output directory...")
     _output.mkdir(true)
     System.print("[DEBUG] Building pages...")
     buildPages()
     System.print("[DEBUG] Building search index...")
     var searchIndex = buildSearchIndex()
     System.print("[DEBUG] Converting search index to JSON...")
     _searchIndexJson = escapeJsonForHtml_(Json.stringify(searchIndex))
     System.print("[DEBUG] Rebuilding pages with index...")
     rebuildPagesWithIndex()
     System.print("[DEBUG] Copying static files...")
     copyStatic()
     System.print("Built manual to %(_output)")
   }
   formatTemplates() {
     System.print("[DEBUG] formatTemplates: skipped (slow in Wren)")
   }
   buildPages() {
     var pagesDir = _src / "pages"
     for (htmlFile in pagesDir.rglob("*.html")) {
       var relPath = htmlFile.relativeTo(pagesDir)
       buildPage(htmlFile, relPath)
     }
   }
   rebuildPagesWithIndex() {
     var pagesDir = _src / "pages"
     for (htmlFile in pagesDir.rglob("*.html")) {
       var relPath = htmlFile.relativeTo(pagesDir)
       buildPage(htmlFile, relPath)
     }
   }
   buildPage(srcPath, relPath) {
     System.print("[DEBUG] buildPage: %(relPath)")
     var templatePath = "pages/" + relPath.toString
     System.print("[DEBUG]   Getting template: %(templatePath)")
     var template = _env.getTemplate(templatePath)
     System.print("[DEBUG]   Template loaded")
     var depth = relPath.parts.count - 1
     var staticPrefix = depth > 0 ? ("../" * depth) : "./"
     var context = {
       "current_path": relPath.toString,
       "static_prefix": staticPrefix,
       "depth": depth,
       "seo": {},
       "site": _site,
       "nav": _nav,
       "search_index_json": _searchIndexJson
     }
     var html = template.render(context)
     var text = TextExtractor.extract(html)
     var title = SEOGenerator.extractTitle(html)
     var relStr = relPath.toString
     var ogType = (relStr.contains("api/") || relStr.contains("tutorials/")) ? "article" : "website"
     var seo = {
       "keywords": SEOGenerator.extractKeywords(text, title, 10),
       "description": SEOGenerator.generateDescription(text, title, 155),
       "og_title": "%(title) - Wren-CLI",
       "og_type": ogType
     }
     context["seo"] = seo
     html = template.render(context)
     var outPath = _output / relPath
     outPath.parent.mkdir(true)
     outPath.writeText(html)
     System.print("  %(relPath)")
   }
   copyStatic() {
     var staticSrc = _src / "static"
     for (entry in staticSrc.walk()) {
       var root = entry[0]
       var files = entry[2]
       for (f in files) {
         var item = root / f
         var rel = item.relativeTo(staticSrc)
         var dest = _output / rel
         dest.parent.mkdir(true)
         item.copyfile(dest)
       }
     }
   }
   buildSearchIndex() {
     var index = {"pages": []}
     for (section in _nav["sections"]) {
       var sectionTitle = section["title"]
       var sectionDir = section["directory"]
       var pages = section["pages"]
       if (pages == null) pages = []
       for (page in pages) {
         var url = "%(sectionDir)/%(page["file"]).html"
         var renderedPath = _output / url
         var content = ""
         if (renderedPath.exists()) {
           var html = renderedPath.readText()
           content = TextExtractor.extract(html)
           var words = content.split(" ")
           if (words.count > 500) {
             content = words[0...500].join(" ")
           }
         }
         var description = page["description"]
         if (description == null) description = ""
         var methods = page["methods"]
         if (methods == null) methods = []
         index["pages"].add({
           "url": url,
           "title": page["title"],
           "section": sectionTitle,
           "description": description,
           "methods": methods,
           "content": content
         })
       }
     }
     (_output / "search-index.json").writeText(Json.stringify(index, 2))
     return index
   }
   escapeJsonForHtml_(jsonString) {
     return jsonString.replace("</", "<\\/").replace("<!--", "<\\!--")
   }
 }
 var builder = ManualBuilder.new()
 builder.build()