374 lines
12 KiB
JavaScript
374 lines
12 KiB
JavaScript
|
|
// retoor <retoor@molodetz.nl>
|
||
|
|
|
||
|
|
import "io" for File, Directory
|
||
|
|
import "os" for Process
|
||
|
|
import "pathlib" for Path
|
||
|
|
import "yaml" for Yaml
|
||
|
|
import "json" for Json
|
||
|
|
import "jinja" for Environment, FileSystemLoader, ChoiceLoader
|
||
|
|
import "regex" for Regex
|
||
|
|
import "strutil" for Str
|
||
|
|
|
||
|
|
class TextExtractor {
|
||
|
|
static SKIP_TAGS { ["script", "style", "nav", "head", "header", "footer", "aside"] }
|
||
|
|
|
||
|
|
static extract(html) {
|
||
|
|
var text = html
|
||
|
|
for (tag in TextExtractor.SKIP_TAGS) {
|
||
|
|
var pattern = Regex.new("<" + tag + "[^>]*>([^<]|<[^/]|</[^" + tag[0] + "])*</" + tag + ">", "gi")
|
||
|
|
text = pattern.replaceAll(text, "")
|
||
|
|
}
|
||
|
|
var tagPattern = Regex.new("<[^>]+>", "g")
|
||
|
|
text = tagPattern.replaceAll(text, " ")
|
||
|
|
var whitespace = Regex.new("[ \t\n\r\f]+", "g")
|
||
|
|
text = whitespace.replaceAll(text, " ")
|
||
|
|
return text.trim()
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
class SEOGenerator {
|
||
|
|
static STOP_WORDS {
|
||
|
|
return [
|
||
|
|
"the", "a", "an", "and", "or", "but", "in", "on", "at", "to", "for",
|
||
|
|
"of", "with", "by", "from", "as", "is", "was", "are", "were", "been",
|
||
|
|
"be", "have", "has", "had", "do", "does", "did", "will", "would",
|
||
|
|
"could", "should", "may", "might", "must", "shall", "can", "need",
|
||
|
|
"this", "that", "these", "those", "it", "its", "they", "them",
|
||
|
|
"we", "us", "you", "your", "he", "she", "him", "her", "i", "my",
|
||
|
|
"if", "then", "else", "when", "where", "why", "how", "what", "which",
|
||
|
|
"who", "whom", "not", "no", "yes", "all", "any", "both", "each",
|
||
|
|
"more", "most", "other", "some", "such", "only", "same", "so",
|
||
|
|
"than", "too", "very", "just", "also", "now", "here", "there"
|
||
|
|
]
|
||
|
|
}
|
||
|
|
|
||
|
|
static WREN_TERMS {
|
||
|
|
return [
|
||
|
|
"wren", "fiber", "class", "method", "module", "import", "foreign",
|
||
|
|
"static", "construct", "scheduler", "async", "await", "cli", "api",
|
||
|
|
"json", "http", "websocket", "sqlite", "crypto", "tls", "regex"
|
||
|
|
]
|
||
|
|
}
|
||
|
|
|
||
|
|
static extractKeywords(text, title, maxKeywords) {
|
||
|
|
var wordPattern = Regex.new("\\b[a-zA-Z][a-zA-Z0-9_]{2,}\\b", "g")
|
||
|
|
var matches = wordPattern.matchAll(Str.toLower(text))
|
||
|
|
var freq = {}
|
||
|
|
var stopWords = SEOGenerator.STOP_WORDS
|
||
|
|
for (match in matches) {
|
||
|
|
var word = match.text
|
||
|
|
if (word.count > 2 && !stopWords.contains(word)) {
|
||
|
|
if (freq.containsKey(word)) {
|
||
|
|
freq[word] = freq[word] + 1
|
||
|
|
} else {
|
||
|
|
freq[word] = 1
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
var titleMatches = wordPattern.matchAll(Str.toLower(title))
|
||
|
|
var titleWords = []
|
||
|
|
for (m in titleMatches) titleWords.add(m.text)
|
||
|
|
var wrenTerms = SEOGenerator.WREN_TERMS
|
||
|
|
for (word in freq.keys) {
|
||
|
|
if (titleWords.contains(word)) freq[word] = freq[word] * 3
|
||
|
|
if (wrenTerms.contains(word)) freq[word] = freq[word] * 2
|
||
|
|
}
|
||
|
|
var sorted = freq.keys.toList
|
||
|
|
sorted.sort {|a, b| freq[b] - freq[a] }
|
||
|
|
var result = []
|
||
|
|
var count = 0
|
||
|
|
for (word in sorted) {
|
||
|
|
if (count >= maxKeywords) break
|
||
|
|
result.add(word)
|
||
|
|
count = count + 1
|
||
|
|
}
|
||
|
|
return result
|
||
|
|
}
|
||
|
|
|
||
|
|
static generateDescription(text, title, maxLength) {
|
||
|
|
var ws = Regex.new("\\s+", "g")
|
||
|
|
text = ws.replaceAll(text, " ").trim()
|
||
|
|
var skipPattern = Regex.new("^Skip to main content\\s*", "")
|
||
|
|
text = skipPattern.replaceAll(text, "")
|
||
|
|
var menuPattern = Regex.new("^Menu\\s+", "")
|
||
|
|
text = menuPattern.replaceAll(text, "")
|
||
|
|
var versionPattern = Regex.new("Wren-CLI\\s+v[\\d.]+\\s*", "g")
|
||
|
|
text = versionPattern.replaceAll(text, "")
|
||
|
|
var navPattern = Regex.new("Previous:.*?Next:.*$", "")
|
||
|
|
text = navPattern.replaceAll(text, "")
|
||
|
|
text = text.trim()
|
||
|
|
var sentencePattern = Regex.new("[.!?][ \t\n\r\f]+", "g")
|
||
|
|
var sentences = sentencePattern.split(text)
|
||
|
|
var filtered = []
|
||
|
|
for (s in sentences) {
|
||
|
|
var sLen = s.bytes.count
|
||
|
|
if (sLen > 20 && !s.startsWith("import ") && !s.startsWith("var ") && !s.startsWith("//")) {
|
||
|
|
filtered.add(s)
|
||
|
|
}
|
||
|
|
}
|
||
|
|
if (filtered.isEmpty) return "%(title) - Wren-CLI documentation and reference."
|
||
|
|
var description = filtered[0]
|
||
|
|
var descLen = description.bytes.count
|
||
|
|
if (descLen > maxLength) {
|
||
|
|
description = description[0...(maxLength - 3)]
|
||
|
|
descLen = description.bytes.count
|
||
|
|
var lastSpace = descLen - 1
|
||
|
|
while (lastSpace > 0 && description[lastSpace] != " ") lastSpace = lastSpace - 1
|
||
|
|
if (lastSpace > 0) description = description[0...lastSpace]
|
||
|
|
description = description + "..."
|
||
|
|
} else if (descLen < 80 && filtered.count > 1) {
|
||
|
|
var i = 1
|
||
|
|
var fCount = filtered.count
|
||
|
|
while (i < fCount) {
|
||
|
|
var addLen = filtered[i].bytes.count
|
||
|
|
if (descLen + addLen + 1 <= maxLength) {
|
||
|
|
description = description + " " + filtered[i]
|
||
|
|
descLen = descLen + addLen + 1
|
||
|
|
} else {
|
||
|
|
break
|
||
|
|
}
|
||
|
|
i = i + 1
|
||
|
|
}
|
||
|
|
}
|
||
|
|
return description
|
||
|
|
}
|
||
|
|
|
||
|
|
static extractTitle(html) {
|
||
|
|
var h1Pattern = Regex.new("<h1[^>]*>([^<]+)</h1>", "i")
|
||
|
|
var match = h1Pattern.match(html)
|
||
|
|
if (match) return match.group(1).trim()
|
||
|
|
var titlePattern = Regex.new("<title[^>]*>([^<]+)</title>", "i")
|
||
|
|
match = titlePattern.match(html)
|
||
|
|
if (match) {
|
||
|
|
var title = match.group(1).trim()
|
||
|
|
if (title.contains(" - ")) {
|
||
|
|
var parts = title.split(" - ")
|
||
|
|
return parts[0]
|
||
|
|
}
|
||
|
|
return title
|
||
|
|
}
|
||
|
|
return "Wren-CLI Documentation"
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
class TemplateFormatter {
|
||
|
|
static INDENT { " " }
|
||
|
|
|
||
|
|
construct new() {
|
||
|
|
_fixes = []
|
||
|
|
}
|
||
|
|
|
||
|
|
fixes { _fixes }
|
||
|
|
|
||
|
|
formatFile(path, section) {
|
||
|
|
var content = path.readText()
|
||
|
|
var original = content
|
||
|
|
content = ensureAuthorComment_(content)
|
||
|
|
content = fixArticleIndentation_(content)
|
||
|
|
content = fixNavigationUrls_(content, section)
|
||
|
|
if (content != original) {
|
||
|
|
path.writeText(content)
|
||
|
|
_fixes.add(path.toString)
|
||
|
|
return true
|
||
|
|
}
|
||
|
|
return false
|
||
|
|
}
|
||
|
|
|
||
|
|
ensureAuthorComment_(content) {
|
||
|
|
if (!content.startsWith("{# retoor")) {
|
||
|
|
return "{# retoor <retoor@molodetz.nl> #}" + "\n" + content
|
||
|
|
}
|
||
|
|
return content
|
||
|
|
}
|
||
|
|
|
||
|
|
fixArticleIndentation_(content) {
|
||
|
|
var lines = content.split("\n")
|
||
|
|
var result = []
|
||
|
|
var inArticle = false
|
||
|
|
for (line in lines) {
|
||
|
|
if (line.contains("{\x25 block article \x25}")) {
|
||
|
|
inArticle = true
|
||
|
|
result.add(line)
|
||
|
|
continue
|
||
|
|
}
|
||
|
|
if (line.contains("{\x25 endblock \x25}") && inArticle) {
|
||
|
|
inArticle = false
|
||
|
|
result.add(line)
|
||
|
|
continue
|
||
|
|
}
|
||
|
|
if (inArticle && !line.isEmpty && !line.startsWith(" ")) {
|
||
|
|
var stripped = line.trim()
|
||
|
|
if (stripped.startsWith("<") && !stripped.startsWith("<!")) {
|
||
|
|
line = TemplateFormatter.INDENT + stripped
|
||
|
|
}
|
||
|
|
}
|
||
|
|
result.add(line)
|
||
|
|
}
|
||
|
|
return result.join("\n")
|
||
|
|
}
|
||
|
|
|
||
|
|
fixNavigationUrls_(content, section) {
|
||
|
|
return content
|
||
|
|
}
|
||
|
|
|
||
|
|
report() {
|
||
|
|
if (!_fixes.isEmpty) {
|
||
|
|
System.print(" Auto-formatted %(_fixes.count) file(s):")
|
||
|
|
for (f in _fixes) {
|
||
|
|
System.print(" %(f)")
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
class ManualBuilder {
|
||
|
|
construct new() {
|
||
|
|
_root = Path.new(Process.cwd)
|
||
|
|
_src = _root / "manual_src"
|
||
|
|
_output = _root / "bin" / "manual"
|
||
|
|
_site = Yaml.parse((_src / "data/site.yaml").readText())
|
||
|
|
_nav = Yaml.parse((_src / "data/navigation.yaml").readText())
|
||
|
|
var templatesLoader = FileSystemLoader.new((_src / "templates").toString)
|
||
|
|
var pagesLoader = FileSystemLoader.new(_src.toString)
|
||
|
|
_env = Environment.new(ChoiceLoader.new([templatesLoader, pagesLoader]))
|
||
|
|
_searchIndexJson = ""
|
||
|
|
}
|
||
|
|
|
||
|
|
build() {
|
||
|
|
System.print("[DEBUG] Starting build...")
|
||
|
|
System.print("[DEBUG] Checking output exists...")
|
||
|
|
if (_output.exists()) {
|
||
|
|
System.print("[DEBUG] Removing old output...")
|
||
|
|
_output.rmtree()
|
||
|
|
}
|
||
|
|
System.print("[DEBUG] Creating output directory...")
|
||
|
|
_output.mkdir(true)
|
||
|
|
System.print("[DEBUG] Building pages...")
|
||
|
|
buildPages()
|
||
|
|
System.print("[DEBUG] Building search index...")
|
||
|
|
var searchIndex = buildSearchIndex()
|
||
|
|
System.print("[DEBUG] Converting search index to JSON...")
|
||
|
|
_searchIndexJson = escapeJsonForHtml_(Json.stringify(searchIndex))
|
||
|
|
System.print("[DEBUG] Rebuilding pages with index...")
|
||
|
|
rebuildPagesWithIndex()
|
||
|
|
System.print("[DEBUG] Copying static files...")
|
||
|
|
copyStatic()
|
||
|
|
System.print("Built manual to %(_output)")
|
||
|
|
}
|
||
|
|
|
||
|
|
formatTemplates() {
|
||
|
|
System.print("[DEBUG] formatTemplates: skipped (slow in Wren)")
|
||
|
|
}
|
||
|
|
|
||
|
|
buildPages() {
|
||
|
|
var pagesDir = _src / "pages"
|
||
|
|
for (htmlFile in pagesDir.rglob("*.html")) {
|
||
|
|
var relPath = htmlFile.relativeTo(pagesDir)
|
||
|
|
buildPage(htmlFile, relPath)
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
rebuildPagesWithIndex() {
|
||
|
|
var pagesDir = _src / "pages"
|
||
|
|
for (htmlFile in pagesDir.rglob("*.html")) {
|
||
|
|
var relPath = htmlFile.relativeTo(pagesDir)
|
||
|
|
buildPage(htmlFile, relPath)
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
buildPage(srcPath, relPath) {
|
||
|
|
System.print("[DEBUG] buildPage: %(relPath)")
|
||
|
|
var templatePath = "pages/" + relPath.toString
|
||
|
|
System.print("[DEBUG] Getting template: %(templatePath)")
|
||
|
|
var template = _env.getTemplate(templatePath)
|
||
|
|
System.print("[DEBUG] Template loaded")
|
||
|
|
var depth = relPath.parts.count - 1
|
||
|
|
var staticPrefix = depth > 0 ? ("../" * depth) : "./"
|
||
|
|
var context = {
|
||
|
|
"current_path": relPath.toString,
|
||
|
|
"static_prefix": staticPrefix,
|
||
|
|
"depth": depth,
|
||
|
|
"seo": {},
|
||
|
|
"site": _site,
|
||
|
|
"nav": _nav,
|
||
|
|
"search_index_json": _searchIndexJson
|
||
|
|
}
|
||
|
|
var html = template.render(context)
|
||
|
|
var text = TextExtractor.extract(html)
|
||
|
|
var title = SEOGenerator.extractTitle(html)
|
||
|
|
var relStr = relPath.toString
|
||
|
|
var ogType = (relStr.contains("api/") || relStr.contains("tutorials/")) ? "article" : "website"
|
||
|
|
var seo = {
|
||
|
|
"keywords": SEOGenerator.extractKeywords(text, title, 10),
|
||
|
|
"description": SEOGenerator.generateDescription(text, title, 155),
|
||
|
|
"og_title": "%(title) - Wren-CLI",
|
||
|
|
"og_type": ogType
|
||
|
|
}
|
||
|
|
context["seo"] = seo
|
||
|
|
html = template.render(context)
|
||
|
|
var outPath = _output / relPath
|
||
|
|
outPath.parent.mkdir(true)
|
||
|
|
outPath.writeText(html)
|
||
|
|
System.print(" %(relPath)")
|
||
|
|
}
|
||
|
|
|
||
|
|
copyStatic() {
|
||
|
|
var staticSrc = _src / "static"
|
||
|
|
for (entry in staticSrc.walk()) {
|
||
|
|
var root = entry[0]
|
||
|
|
var files = entry[2]
|
||
|
|
for (f in files) {
|
||
|
|
var item = root / f
|
||
|
|
var rel = item.relativeTo(staticSrc)
|
||
|
|
var dest = _output / rel
|
||
|
|
dest.parent.mkdir(true)
|
||
|
|
item.copyfile(dest)
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
buildSearchIndex() {
|
||
|
|
var index = {"pages": []}
|
||
|
|
for (section in _nav["sections"]) {
|
||
|
|
var sectionTitle = section["title"]
|
||
|
|
var sectionDir = section["directory"]
|
||
|
|
var pages = section["pages"]
|
||
|
|
if (pages == null) pages = []
|
||
|
|
for (page in pages) {
|
||
|
|
var url = "%(sectionDir)/%(page["file"]).html"
|
||
|
|
var renderedPath = _output / url
|
||
|
|
var content = ""
|
||
|
|
if (renderedPath.exists()) {
|
||
|
|
var html = renderedPath.readText()
|
||
|
|
content = TextExtractor.extract(html)
|
||
|
|
var words = content.split(" ")
|
||
|
|
if (words.count > 500) {
|
||
|
|
content = words[0...500].join(" ")
|
||
|
|
}
|
||
|
|
}
|
||
|
|
var description = page["description"]
|
||
|
|
if (description == null) description = ""
|
||
|
|
var methods = page["methods"]
|
||
|
|
if (methods == null) methods = []
|
||
|
|
index["pages"].add({
|
||
|
|
"url": url,
|
||
|
|
"title": page["title"],
|
||
|
|
"section": sectionTitle,
|
||
|
|
"description": description,
|
||
|
|
"methods": methods,
|
||
|
|
"content": content
|
||
|
|
})
|
||
|
|
}
|
||
|
|
}
|
||
|
|
(_output / "search-index.json").writeText(Json.stringify(index, 2))
|
||
|
|
return index
|
||
|
|
}
|
||
|
|
|
||
|
|
escapeJsonForHtml_(jsonString) {
|
||
|
|
return jsonString.replace("</", "<\\/").replace("<!--", "<\\!--")
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
var builder = ManualBuilder.new()
|
||
|
|
builder.build()
|