|
|
Line 1: |
Line 1: |
| local p = {} | | local p = {} |
|
| |
|
| -- Escape special wikitext characters | | -- Utility: unescape common HTML entities |
| local function escapeWikitext(text) | | local function htmlUnescape(s) |
| if not text then return "" end
| | s = s:gsub("\\u003C", "<") |
| -- Escape wikitext special characters
| | s = s:gsub("\\u003E", ">") |
| text = text:gsub("([%[%]{}|=])", "\\%1")
| | s = s:gsub("<", "<") |
| return text
| | s = s:gsub(">", ">") |
| | s = s:gsub("&", "&") |
| | s = s:gsub(""", '"') |
| | s = s:gsub("'", "'") |
| | return s |
| end | | end |
|
| |
|
| -- Convert headers | | -- Utility: strip inline HTML tags (like iframe, br, strong, etc) |
| local function convertHeaders(text) | | local function stripHTML(s) |
| -- Convert ATX headers (# Header)
| | -- Remove <iframe ...>...</iframe> |
| text = text:gsub("^(#+)%s*(.-)%s*#*$", function(hashes, content)
| | s = s:gsub("<iframe.-</iframe>", "") |
| local level = #hashes
| | -- Remove all remaining HTML tags |
| if level > 6 then level = 6 end
| | s = s:gsub("<.->", "") |
| local equals = string.rep("=", level + 1)
| | return s |
| return equals .. " " .. content .. " " .. equals
| |
| end)
| |
|
| |
| return text
| |
| end | | end |
|
| |
|
| -- Convert emphasis and strong | | -- Main converter |
| local function convertEmphasis(text)
| | function p.convertMarkdownToWikitext(markdown) |
| -- Convert **bold** and __bold__
| | if not markdown then return "(No content)" end |
| text = text:gsub("%*%*(.-)%*%*", "'''%1'''")
| |
| text = text:gsub("__(.-)__", "'''%1'''")
| |
|
| |
| -- Convert *italic* and _italic_
| |
| text = text:gsub("([^%*])%*([^%*].-[^%*])%*([^%*])", "%1''%2''%3")
| |
| text = text:gsub("^%*([^%*].-[^%*])%*([^%*])", "''%1''%2")
| |
| text = text:gsub("([^%*])%*([^%*].-[^%*])%*$", "%1''%2''")
| |
| text = text:gsub("^%*([^%*].-[^%*])%*$", "''%1''")
| |
|
| |
| text = text:gsub("([^_])_([^_].-[^_])_([^_])", "%1''%2''%3")
| |
| text = text:gsub("^_([^_].-[^_])_([^_])", "''%1''%2")
| |
| text = text:gsub("([^_])_([^_].-[^_])_$", "%1''%2''")
| |
| text = text:gsub("^_([^_].-[^_])_$", "''%1''")
| |
|
| |
| return text
| |
| end
| |
| | |
| -- Convert links
| |
| local function convertLinks(text)
| |
| -- Convert [text](url) links
| |
| text = text:gsub("%[(.-)%]%((.-)%)", "[%2 %1]")
| |
|
| |
| -- Convert <url> autolinks
| |
| text = text:gsub("<(https?://[^>]+)>", "[%1]")
| |
|
| |
| return text
| |
| end
| |
| | |
| -- Convert images
| |
| local function convertImages(text)
| |
| -- Convert  images
| |
| text = text:gsub("!%[(.-)%]%((.-)%)", "[[File:%2|thumb|%1]]")
| |
|
| |
| return text
| |
| end
| |
| | |
| -- Convert code blocks and inline code
| |
| local function convertCode(text)
| |
| -- Convert fenced code blocks (```lang\ncode\n```) - must be on their own lines
| |
| text = text:gsub("\n```([^\n]*)\n(.-)\n```\n", function(lang, code)
| |
| if lang and lang ~= "" then
| |
| return "\n<syntaxhighlight lang=\"" .. lang .. "\">\n" .. code .. "\n</syntaxhighlight>\n"
| |
| else
| |
| return "\n<pre>\n" .. code .. "\n</pre>\n"
| |
| end
| |
| end)
| |
|
| |
| -- Handle fenced code blocks at start/end of text
| |
| text = text:gsub("^```([^\n]*)\n(.-)\n```\n", function(lang, code)
| |
| if lang and lang ~= "" then
| |
| return "<syntaxhighlight lang=\"" .. lang .. "\">\n" .. code .. "\n</syntaxhighlight>\n"
| |
| else
| |
| return "<pre>\n" .. code .. "\n</pre>\n"
| |
| end
| |
| end)
| |
|
| |
| text = text:gsub("\n```([^\n]*)\n(.-)\n```$", function(lang, code)
| |
| if lang and lang ~= "" then
| |
| return "\n<syntaxhighlight lang=\"" .. lang .. "\">\n" .. code .. "\n</syntaxhighlight>"
| |
| else
| |
| return "\n<pre>\n" .. code .. "\n</pre>"
| |
| end
| |
| end)
| |
|
| |
| -- Convert indented code blocks (4+ spaces at start of line, multiple consecutive lines)
| |
| local lines = {}
| |
| for line in text:gmatch("[^\n]*") do
| |
| table.insert(lines, line)
| |
| end
| |
|
| |
| local result = {}
| |
| local codeBlock = {}
| |
| local inCodeBlock = false
| |
|
| |
| for i, line in ipairs(lines) do
| |
| local hasLeadingSpaces = line:match("^ ")
| |
| local isEmpty = line:match("^%s*$")
| |
|
| |
| if hasLeadingSpaces then
| |
| -- This line starts with 4+ spaces
| |
| if not inCodeBlock then
| |
| inCodeBlock = true
| |
| end
| |
| table.insert(codeBlock, line:gsub("^ ", ""))
| |
| elseif isEmpty and inCodeBlock then
| |
| -- Empty line in code block - include it
| |
| table.insert(codeBlock, "")
| |
| else
| |
| -- Non-code line
| |
| if inCodeBlock then
| |
| -- End the code block
| |
| if #codeBlock > 0 then
| |
| table.insert(result, "<pre>")
| |
| for _, codeLine in ipairs(codeBlock) do
| |
| table.insert(result, codeLine)
| |
| end
| |
| table.insert(result, "</pre>")
| |
| end
| |
| codeBlock = {}
| |
| inCodeBlock = false
| |
| end
| |
| table.insert(result, line)
| |
| end
| |
| end
| |
|
| |
| -- Handle code block at end
| |
| if inCodeBlock and #codeBlock > 0 then
| |
| table.insert(result, "<pre>")
| |
| for _, codeLine in ipairs(codeBlock) do
| |
| table.insert(result, codeLine)
| |
| end
| |
| table.insert(result, "</pre>")
| |
| end
| |
|
| |
| text = table.concat(result, "\n")
| |
|
| |
| -- Convert inline code (`code`) - but not if it spans multiple lines
| |
| text = text:gsub("`([^`\n]+)`", "<code>%1</code>")
| |
|
| |
| return text
| |
| end
| |
| | |
| -- Convert lists
| |
| local function convertLists(text)
| |
| local lines = {}
| |
| for line in text:gmatch("[^\n]*") do
| |
| table.insert(lines, line)
| |
| end
| |
|
| |
| local result = {}
| |
| local inList = false
| |
| local listType = nil
| |
|
| |
| for i, line in ipairs(lines) do
| |
| local trimmed = line:match("^%s*(.-)%s*$")
| |
|
| |
| -- Unordered list
| |
| local bulletMatch = line:match("^(%s*)[-*+]%s+(.+)$")
| |
| if bulletMatch then
| |
| local indent, content = line:match("^(%s*)[-*+]%s+(.+)$")
| |
| local level = math.floor(#indent / 2) + 1
| |
| local bullets = string.rep("*", level)
| |
| table.insert(result, bullets .. " " .. content)
| |
| inList = true
| |
| listType = "unordered"
| |
| -- Ordered list
| |
| elseif line:match("^%s*%d+%.%s+") then
| |
| local indent, content = line:match("^(%s*)%d+%.%s+(.+)$")
| |
| local level = math.floor(#indent / 2) + 1
| |
| local bullets = string.rep("#", level)
| |
| table.insert(result, bullets .. " " .. content)
| |
| inList = true
| |
| listType = "ordered"
| |
| else
| |
| if inList and trimmed == "" then
| |
| -- Empty line in list - continue list
| |
| table.insert(result, "")
| |
| else
| |
| inList = false
| |
| listType = nil
| |
| table.insert(result, line)
| |
| end
| |
| end
| |
| end
| |
|
| |
| return table.concat(result, "\n")
| |
| end
| |
| | |
| -- Convert blockquotes
| |
| local function convertBlockquotes(text)
| |
| local lines = {}
| |
| for line in text:gmatch("[^\n]*") do
| |
| table.insert(lines, line)
| |
| end
| |
|
| |
| local result = {}
| |
| local inQuote = false
| |
| local quoteLines = {}
| |
|
| |
| for i, line in ipairs(lines) do
| |
| if line:match("^>%s*") then
| |
| local content = line:gsub("^>%s*", "")
| |
| table.insert(quoteLines, content)
| |
| inQuote = true
| |
| else
| |
| if inQuote then
| |
| -- End of blockquote
| |
| table.insert(result, "<blockquote>")
| |
| for _, quoteLine in ipairs(quoteLines) do
| |
| table.insert(result, quoteLine)
| |
| end
| |
| table.insert(result, "</blockquote>")
| |
| quoteLines = {}
| |
| inQuote = false
| |
| end
| |
| table.insert(result, line)
| |
| end
| |
| end
| |
|
| |
| -- Handle blockquote at end of text
| |
| if inQuote then
| |
| table.insert(result, "<blockquote>")
| |
| for _, quoteLine in ipairs(quoteLines) do
| |
| table.insert(result, quoteLine)
| |
| end
| |
| table.insert(result, "</blockquote>")
| |
| end
| |
|
| |
| return table.concat(result, "\n")
| |
| end
| |
| | |
| -- Strip HTML tags
| |
| local function stripHTML(text)
| |
| if not text then return "" end
| |
|
| |
| -- Remove HTML comments
| |
| text = text:gsub("<!%-%-.-%-%-?>", "")
| |
|
| |
| -- Remove script and style tags and their content
| |
| text = text:gsub("<[Ss][Cc][Rr][Ii][Pp][Tt][^>]*>.-</[Ss][Cc][Rr][Ii][Pp][Tt]>", "")
| |
| text = text:gsub("<[Ss][Tt][Yy][Ll][Ee][^>]*>.-</[Ss][Tt][Yy][Ll][Ee]>", "")
| |
|
| |
| -- Remove iframe tags and content
| |
| text = text:gsub("<[Ii][Ff][Rr][Aa][Mm][Ee][^>]*>.-</[Ii][Ff][Rr][Aa][Mm][Ee]>", "")
| |
|
| |
| -- Remove paragraph tags entirely (no replacement)
| |
| text = text:gsub("<[Pp]>", "")
| |
| text = text:gsub("</[Pp]>", "")
| |
|
| |
| -- Remove line break tags entirely
| |
| text = text:gsub("<[Bb][Rr][^>]*>", "")
| |
|
| |
| -- Remove self-closing HTML tags (like <img/>, etc.)
| |
| text = text:gsub("<[^>]+/>", "")
| |
|
| |
| -- Remove remaining opening and closing HTML tags
| |
| text = text:gsub("<[^>]+>", "")
| |
|
| |
| return text
| |
| end
| |
| | |
| -- Handle escape sequences
| |
| local function handleEscapeSequences(text)
| |
| if not text then return "" end
| |
|
| |
| -- Remove literal \n sequences entirely (don't convert to line breaks)
| |
| text = text:gsub("\\n", "")
| |
|
| |
| -- Convert other common escape sequences
| |
| text = text:gsub("\\r", "")
| |
| text = text:gsub("\\t", " ") -- Convert tabs to single space
| |
| text = text:gsub("\\\"", "\"")
| |
| text = text:gsub("\\'", "'")
| |
| text = text:gsub("\\\\", "\\")
| |
|
| |
| -- Handle unicode escape sequences (\u0000)
| |
| text = text:gsub("\\u(%x%x%x%x)", function(hex)
| |
| local num = tonumber(hex, 16)
| |
| if num then
| |
| return string.char(num)
| |
| else
| |
| return "\\u" .. hex
| |
| end
| |
| end)
| |
|
| |
| return text
| |
| end
| |
| | |
| -- Clean up extra whitespace and normalize line breaks
| |
| local function normalizeWhitespace(text)
| |
| if not text then return "" end
| |
|
| |
| -- Convert different line break styles to consistent \n
| |
| text = text:gsub("\r\n", "\n")
| |
| text = text:gsub("\r", "\n")
| |
|
| |
| -- Remove excessive blank lines (more than 1 consecutive empty line)
| |
| text = text:gsub("\n\n\n+", "\n\n")
| |
|
| |
| -- Remove trailing spaces on lines
| |
| text = text:gsub(" +\n", "\n")
| |
|
| |
| -- Trim leading and trailing whitespace from the entire text
| |
| text = text:match("^%s*(.-)%s*$")
| |
|
| |
| return text
| |
| end
| |
| | |
| -- Convert horizontal rules
| |
| local function convertHorizontalRules(text)
| |
| -- Convert --- or *** or ___ to ----
| |
| text = text:gsub("\n%s*[-*_][-*_%s]*[-*_]%s*\n", "\n----\n")
| |
| text = text:gsub("^%s*[-*_][-*_%s]*[-*_]%s*\n", "----\n")
| |
| text = text:gsub("\n%s*[-*_][-*_%s]*[-*_]%s*$", "\n----")
| |
|
| |
| return text
| |
| end
| |
| | |
| -- Convert tables
| |
| local function convertTables(text)
| |
| local lines = {}
| |
| for line in text:gmatch("[^\n]*") do
| |
| table.insert(lines, line)
| |
| end
| |
|
| |
| local result = {}
| |
| local inTable = false
| |
| local tableLines = {}
| |
|
| |
| for i, line in ipairs(lines) do
| |
| if line:match("|") and not line:match("^%s*$") then
| |
| table.insert(tableLines, line)
| |
| inTable = true
| |
| else
| |
| if inTable then
| |
| -- Process the table
| |
| local wikiTable = convertMarkdownTable(tableLines)
| |
| for _, tableLine in ipairs(wikiTable) do
| |
| table.insert(result, tableLine)
| |
| end
| |
| tableLines = {}
| |
| inTable = false
| |
| end
| |
| table.insert(result, line)
| |
| end
| |
| end
| |
|
| |
| -- Handle table at end
| |
| if inTable then
| |
| local wikiTable = convertMarkdownTable(tableLines)
| |
| for _, tableLine in ipairs(wikiTable) do
| |
| table.insert(result, tableLine)
| |
| end
| |
| end
| |
|
| |
| return table.concat(result, "\n")
| |
| end | |
|
| |
|
| -- Helper function to convert markdown table to wikitable | | -- Decode escapes |
| function convertMarkdownTable(lines)
| | markdown = htmlUnescape(markdown) |
| local result = {"{| class=\"wikitable\""}
| | markdown = markdown:gsub("\\n", "\n") -- convert escaped newlines |
| local headerProcessed = false | | markdown = markdown:gsub("\\%-", "–") -- hyphen escape |
|
| | markdown = markdown:gsub("\\!", "!") -- unescape exclamation |
| for i, line in ipairs(lines) do
| | markdown = markdown:gsub("\\_", "_") -- keep underscores |
| local trimmed = line:match("^%s*(.-)%s*$")
| | markdown = markdown:gsub("\\+", "+") -- unescape pluses |
|
| | markdown = markdown:gsub("\\*", "*") -- unescape asterisks |
| -- Skip separator lines (|---|---|)
| | markdown = markdown:gsub("\\\\", "\\") -- backslashes |
| if not trimmed:match("^|?[-:%s|]+|?$") then
| |
| local cells = {}
| |
| for cell in trimmed:gmatch("|([^|]*)") do
| |
| local cleanCell = cell:match("^%s*(.-)%s*$")
| |
| table.insert(cells, cleanCell)
| |
| end
| |
|
| |
| if not headerProcessed then
| |
| -- First row is header
| |
| table.insert(result, "|-")
| |
| for j, cell in ipairs(cells) do
| |
| table.insert(result, "! " .. cell)
| |
| end
| |
| headerProcessed = true
| |
| else
| |
| -- Data row
| |
| table.insert(result, "|-")
| |
| for j, cell in ipairs(cells) do
| |
| table.insert(result, "| " .. cell)
| |
| end
| |
| end
| |
| end
| |
| end
| |
|
| |
| table.insert(result, "|}")
| |
| return result
| |
| end
| |
|
| |
|
| -- Main conversion function
| | -- Strip inline HTML |
| function p.convert(frame)
| | markdown = stripHTML(markdown) |
| local markdown = frame.args[1] or frame:getParent().args[1] or ""
| |
| local stripHtml = frame.args.striphtml or frame:getParent().args.striphtml or "true"
| |
| local handleEscapes = frame.args.escapes or frame:getParent().args.escapes or "true"
| |
|
| |
| -- Apply pre-processing
| |
| local wikitext = markdown
| |
|
| |
| -- Strip HTML if requested (default: true)
| |
| if stripHtml ~= "false" then
| |
| wikitext = stripHTML(wikitext)
| |
| end
| |
|
| |
| -- Handle escape sequences if requested (default: true)
| |
| if handleEscapes ~= "false" then
| |
| wikitext = handleEscapeSequences(wikitext)
| |
| end
| |
|
| |
| -- Normalize whitespace
| |
| wikitext = normalizeWhitespace(wikitext)
| |
|
| |
| -- Apply conversions in order
| |
|
| |
| -- Code blocks first (to protect code from other conversions)
| |
| wikitext = convertCode(wikitext)
| |
|
| |
| -- Headers
| |
| wikitext = convertHeaders(wikitext)
| |
|
| |
| -- Emphasis and strong
| |
| wikitext = convertEmphasis(wikitext)
| |
|
| |
| -- Links and images
| |
| wikitext = convertLinks(wikitext)
| |
| wikitext = convertImages(wikitext)
| |
|
| |
| -- Lists
| |
| wikitext = convertLists(wikitext)
| |
|
| |
| -- Blockquotes
| |
| wikitext = convertBlockquotes(wikitext)
| |
|
| |
| -- Tables
| |
| wikitext = convertTables(wikitext)
| |
|
| |
| -- Horizontal rules
| |
| wikitext = convertHorizontalRules(wikitext)
| |
|
| |
| return wikitext
| |
| end
| |
|
| |
|
| -- Function to just clean/strip content without markdown conversion | | -- Headings |
| function p.clean(frame)
| | markdown = markdown:gsub("\n###### (.-)\n", "\n====== %1 ======\n") |
| local text = frame.args[1] or frame:getParent().args[1] or ""
| | markdown = markdown:gsub("\n##### (.-)\n", "\n===== %1 =====\n") |
| local stripHtml = frame.args.striphtml or frame:getParent().args.striphtml or "true"
| | markdown = markdown:gsub("\n#### (.-)\n", "\n==== %1 ====\n") |
| local handleEscapes = frame.args.escapes or frame:getParent().args.escapes or "true"
| | markdown = markdown:gsub("\n### (.-)\n", "\n=== %1 ===\n") |
|
| | markdown = markdown:gsub("\n## (.-)\n", "\n== %1 ==\n") |
| local cleaned = text
| | markdown = markdown:gsub("\n# (.-)\n", "\n= %1 =\n") |
|
| |
| -- Strip HTML if requested (default: true)
| |
| if stripHtml ~= "false" then
| |
| cleaned = stripHTML(cleaned)
| |
| end
| |
|
| |
| -- Handle escape sequences if requested (default: true)
| |
| if handleEscapes ~= "false" then
| |
| cleaned = handleEscapeSequences(cleaned)
| |
| end
| |
|
| |
| -- Normalize whitespace
| |
| cleaned = normalizeWhitespace(cleaned)
| |
|
| |
| return cleaned
| |
| end
| |
|
| |
|
| -- Function to strip only HTML | | -- Bold/Italic |
| function p.stripHTML(frame)
| | markdown = markdown:gsub("%*%*%*(.-)%*%*%*", "'''''%1'''''") |
| local text = frame.args[1] or frame:getParent().args[1] or ""
| | markdown = markdown:gsub("%*%*(.-)%*%*", "'''%1'''") |
| return stripHTML(text)
| | markdown = markdown:gsub("%*(.-)%*", "''%1''") |
| end
| |
|
| |
|
| -- Function to handle only escape sequences | | -- Lists (preserve nested indenting) |
| function p.handleEscapes(frame)
| | markdown = markdown:gsub("\n[ \t]*%- ", "\n* ") |
| local text = frame.args[1] or frame:getParent().args[1] or ""
| |
| return handleEscapeSequences(text)
| |
| end
| |
|
| |
|
| -- Function to convert just specific elements | | -- Links: [text](url) |
| function p.convertHeaders(frame)
| | markdown = markdown:gsub("%[(.-)%]%((.-)%)", "[%2 %1]") |
| local text = frame.args[1] or frame:getParent().args[1] or ""
| |
| return convertHeaders(text)
| |
| end
| |
|
| |
|
| function p.convertEmphasis(frame)
| | -- Clean up excess line breaks |
| local text = frame.args[1] or frame:getParent().args[1] or ""
| | markdown = markdown:gsub("\r", "") |
| return convertEmphasis(text)
| | markdown = markdown:gsub("\n\n\n+", "\n\n") |
| end
| |
|
| |
|
| function p.convertLinks(frame)
| | return markdown |
| local text = frame.args[1] or frame:getParent().args[1] or ""
| |
| return convertLinks(text)
| |
| end | | end |
|
| |
|
| function p.convertLists(frame) | | -- Pull markdown from API via ExternalData |
| local text = frame.args[1] or frame:getParent().args[1] or ""
| | function p.renderFromAPI(frame) |
| return convertLists(text)
| | local data = mw.ext.externalData.getData{ |
| | url = 'https://api.vaulthunters.gg/patch-notes?limit=1', |
| | data = { markdown = 'text' } |
| | } |
| | local markdown = data[1] and data[1].markdown or '(No data)' |
| | return p.convertMarkdownToWikitext(markdown) |
| end | | end |
|
| |
|
| return p | | return p |