More actions
Created page with "local p = {} -- Escape special wikitext characters local function escapeWikitext(text) if not text then return "" end -- Escape wikitext special characters text = text:gsub("([%[%]{}|=])", "\\%1") return text end -- Convert headers local function convertHeaders(text) -- Convert ATX headers (# Header) text = text:gsub("^(#+)%s*(.-)%s*#*$", function(hashes, content) local level = #hashes if level > 6 then level = 6 end loca..." |
mNo edit summary |
||
Line 170: | Line 170: | ||
return table.concat(result, "\n") | return table.concat(result, "\n") | ||
end | |||
-- Strip HTML tags | |||
local function stripHTML(text) | |||
if not text then return "" end | |||
-- Remove HTML comments | |||
text = text:gsub("<!%-%-.-%-%-?>", "") | |||
-- Remove script and style tags and their content | |||
text = text:gsub("<[Ss][Cc][Rr][Ii][Pp][Tt][^>]*>.-</[Ss][Cc][Rr][Ii][Pp][Tt]>", "") | |||
text = text:gsub("<[Ss][Tt][Yy][Ll][Ee][^>]*>.-</[Ss][Tt][Yy][Ll][Ee]>", "") | |||
-- Remove iframe tags and content | |||
text = text:gsub("<[Ii][Ff][Rr][Aa][Mm][Ee][^>]*>.-</[Ii][Ff][Rr][Aa][Mm][Ee]>", "") | |||
-- Remove self-closing HTML tags (like <br/>, <img/>, etc.) | |||
text = text:gsub("<[^>]+/>", "") | |||
-- Remove opening and closing HTML tags | |||
text = text:gsub("<[^>]+>", "") | |||
return text | |||
end | |||
-- Handle escape sequences | |||
local function handleEscapeSequences(text) | |||
if not text then return "" end | |||
-- Convert common escape sequences | |||
text = text:gsub("\\n", "\n") | |||
text = text:gsub("\\r", "\r") | |||
text = text:gsub("\\t", "\t") | |||
text = text:gsub("\\\"", "\"") | |||
text = text:gsub("\\'", "'") | |||
text = text:gsub("\\\\", "\\") | |||
-- Handle unicode escape sequences (\u0000) | |||
text = text:gsub("\\u(%x%x%x%x)", function(hex) | |||
local num = tonumber(hex, 16) | |||
if num then | |||
return string.char(num) | |||
else | |||
return "\\u" .. hex | |||
end | |||
end) | |||
return text | |||
end | |||
-- Clean up extra whitespace and normalize line breaks | |||
local function normalizeWhitespace(text) | |||
if not text then return "" end | |||
-- Convert different line break styles to consistent \n | |||
text = text:gsub("\r\n", "\n") | |||
text = text:gsub("\r", "\n") | |||
-- Remove excessive blank lines (more than 2 consecutive) | |||
text = text:gsub("\n\n\n+", "\n\n") | |||
-- Trim leading and trailing whitespace | |||
text = text:match("^%s*(.-)%s*$") | |||
return text | |||
end | end | ||
Line 262: | Line 327: | ||
function p.convert(frame) | function p.convert(frame) | ||
local markdown = frame.args[1] or frame:getParent().args[1] or "" | local markdown = frame.args[1] or frame:getParent().args[1] or "" | ||
local stripHtml = frame.args.striphtml or frame:getParent().args.striphtml or "true" | |||
local handleEscapes = frame.args.escapes or frame:getParent().args.escapes or "true" | |||
-- Apply pre-processing | |||
local wikitext = markdown | |||
-- Strip HTML if requested (default: true) | |||
if stripHtml ~= "false" then | |||
wikitext = stripHTML(wikitext) | |||
end | |||
-- Handle escape sequences if requested (default: true) | |||
if handleEscapes ~= "false" then | |||
wikitext = handleEscapeSequences(wikitext) | |||
end | |||
-- Normalize whitespace | |||
wikitext = normalizeWhitespace(wikitext) | |||
-- Apply conversions in order | -- Apply conversions in order | ||
-- Code blocks first (to protect code from other conversions) | -- Code blocks first (to protect code from other conversions) | ||
Line 292: | Line 374: | ||
return wikitext | return wikitext | ||
end | |||
-- Function to just clean/strip content without markdown conversion | |||
function p.clean(frame) | |||
local text = frame.args[1] or frame:getParent().args[1] or "" | |||
local stripHtml = frame.args.striphtml or frame:getParent().args.striphtml or "true" | |||
local handleEscapes = frame.args.escapes or frame:getParent().args.escapes or "true" | |||
local cleaned = text | |||
-- Strip HTML if requested (default: true) | |||
if stripHtml ~= "false" then | |||
cleaned = stripHTML(cleaned) | |||
end | |||
-- Handle escape sequences if requested (default: true) | |||
if handleEscapes ~= "false" then | |||
cleaned = handleEscapeSequences(cleaned) | |||
end | |||
-- Normalize whitespace | |||
cleaned = normalizeWhitespace(cleaned) | |||
return cleaned | |||
end | |||
-- Function to strip only HTML | |||
function p.stripHTML(frame) | |||
local text = frame.args[1] or frame:getParent().args[1] or "" | |||
return stripHTML(text) | |||
end | |||
-- Function to handle only escape sequences | |||
function p.handleEscapes(frame) | |||
local text = frame.args[1] or frame:getParent().args[1] or "" | |||
return handleEscapeSequences(text) | |||
end | end | ||
Revision as of 14:57, 24 July 2025
Documentation for this module may be created at Module:MarkdownToWikitext/doc
local p = {}
-- Escape special wikitext characters
local function escapeWikitext(text)
if not text then return "" end
-- Escape wikitext special characters
text = text:gsub("([%[%]{}|=])", "\\%1")
return text
end
-- Convert headers
local function convertHeaders(text)
-- Convert ATX headers (# Header)
text = text:gsub("^(#+)%s*(.-)%s*#*$", function(hashes, content)
local level = #hashes
if level > 6 then level = 6 end
local equals = string.rep("=", level + 1)
return equals .. " " .. content .. " " .. equals
end)
return text
end
-- Convert emphasis and strong
local function convertEmphasis(text)
-- Convert **bold** and __bold__
text = text:gsub("%*%*(.-)%*%*", "'''%1'''")
text = text:gsub("__(.-)__", "'''%1'''")
-- Convert *italic* and _italic_
text = text:gsub("([^%*])%*([^%*].-[^%*])%*([^%*])", "%1''%2''%3")
text = text:gsub("^%*([^%*].-[^%*])%*([^%*])", "''%1''%2")
text = text:gsub("([^%*])%*([^%*].-[^%*])%*$", "%1''%2''")
text = text:gsub("^%*([^%*].-[^%*])%*$", "''%1''")
text = text:gsub("([^_])_([^_].-[^_])_([^_])", "%1''%2''%3")
text = text:gsub("^_([^_].-[^_])_([^_])", "''%1''%2")
text = text:gsub("([^_])_([^_].-[^_])_$", "%1''%2''")
text = text:gsub("^_([^_].-[^_])_$", "''%1''")
return text
end
-- Convert links
local function convertLinks(text)
-- Convert [text](url) links
text = text:gsub("%[(.-)%]%((.-)%)", "[%2 %1]")
-- Convert <url> autolinks
text = text:gsub("<(https?://[^>]+)>", "[%1]")
return text
end
-- Convert images
local function convertImages(text)
-- Convert  images
text = text:gsub("!%[(.-)%]%((.-)%)", "[[File:%2|thumb|%1]]")
return text
end
-- Convert code blocks and inline code
local function convertCode(text)
-- Convert fenced code blocks (```lang\ncode\n```)
text = text:gsub("```([^\n]*)\n(.-)\n```", function(lang, code)
if lang and lang ~= "" then
return "<syntaxhighlight lang=\"" .. lang .. "\">\n" .. code .. "\n</syntaxhighlight>"
else
return "<pre>\n" .. code .. "\n</pre>"
end
end)
-- Convert indented code blocks (4+ spaces)
text = text:gsub("\n( .+)", function(code)
return "\n<pre>" .. code:gsub("^ ", "") .. "</pre>"
end)
-- Convert inline code (`code`)
text = text:gsub("`([^`]+)`", "<code>%1</code>")
return text
end
-- Convert lists
local function convertLists(text)
local lines = {}
for line in text:gmatch("[^\n]*") do
table.insert(lines, line)
end
local result = {}
local inList = false
local listType = nil
for i, line in ipairs(lines) do
local trimmed = line:match("^%s*(.-)%s*$")
-- Unordered list
local bulletMatch = line:match("^(%s*)[-*+]%s+(.+)$")
if bulletMatch then
local indent, content = line:match("^(%s*)[-*+]%s+(.+)$")
local level = math.floor(#indent / 2) + 1
local bullets = string.rep("*", level)
table.insert(result, bullets .. " " .. content)
inList = true
listType = "unordered"
-- Ordered list
elseif line:match("^%s*%d+%.%s+") then
local indent, content = line:match("^(%s*)%d+%.%s+(.+)$")
local level = math.floor(#indent / 2) + 1
local bullets = string.rep("#", level)
table.insert(result, bullets .. " " .. content)
inList = true
listType = "ordered"
else
if inList and trimmed == "" then
-- Empty line in list - continue list
table.insert(result, "")
else
inList = false
listType = nil
table.insert(result, line)
end
end
end
return table.concat(result, "\n")
end
-- Convert blockquotes
local function convertBlockquotes(text)
local lines = {}
for line in text:gmatch("[^\n]*") do
table.insert(lines, line)
end
local result = {}
local inQuote = false
local quoteLines = {}
for i, line in ipairs(lines) do
if line:match("^>%s*") then
local content = line:gsub("^>%s*", "")
table.insert(quoteLines, content)
inQuote = true
else
if inQuote then
-- End of blockquote
table.insert(result, "<blockquote>")
for _, quoteLine in ipairs(quoteLines) do
table.insert(result, quoteLine)
end
table.insert(result, "</blockquote>")
quoteLines = {}
inQuote = false
end
table.insert(result, line)
end
end
-- Handle blockquote at end of text
if inQuote then
table.insert(result, "<blockquote>")
for _, quoteLine in ipairs(quoteLines) do
table.insert(result, quoteLine)
end
table.insert(result, "</blockquote>")
end
return table.concat(result, "\n")
end
-- Strip HTML tags
local function stripHTML(text)
if not text then return "" end
-- Remove HTML comments
text = text:gsub("<!%-%-.-%-%-?>", "")
-- Remove script and style tags and their content
text = text:gsub("<[Ss][Cc][Rr][Ii][Pp][Tt][^>]*>.-</[Ss][Cc][Rr][Ii][Pp][Tt]>", "")
text = text:gsub("<[Ss][Tt][Yy][Ll][Ee][^>]*>.-</[Ss][Tt][Yy][Ll][Ee]>", "")
-- Remove iframe tags and content
text = text:gsub("<[Ii][Ff][Rr][Aa][Mm][Ee][^>]*>.-</[Ii][Ff][Rr][Aa][Mm][Ee]>", "")
-- Remove self-closing HTML tags (like <br/>, <img/>, etc.)
text = text:gsub("<[^>]+/>", "")
-- Remove opening and closing HTML tags
text = text:gsub("<[^>]+>", "")
return text
end
-- Handle escape sequences
local function handleEscapeSequences(text)
if not text then return "" end
-- Convert common escape sequences
text = text:gsub("\\n", "\n")
text = text:gsub("\\r", "\r")
text = text:gsub("\\t", "\t")
text = text:gsub("\\\"", "\"")
text = text:gsub("\\'", "'")
text = text:gsub("\\\\", "\\")
-- Handle unicode escape sequences (\u0000)
text = text:gsub("\\u(%x%x%x%x)", function(hex)
local num = tonumber(hex, 16)
if num then
return string.char(num)
else
return "\\u" .. hex
end
end)
return text
end
-- Clean up extra whitespace and normalize line breaks
local function normalizeWhitespace(text)
if not text then return "" end
-- Convert different line break styles to consistent \n
text = text:gsub("\r\n", "\n")
text = text:gsub("\r", "\n")
-- Remove excessive blank lines (more than 2 consecutive)
text = text:gsub("\n\n\n+", "\n\n")
-- Trim leading and trailing whitespace
text = text:match("^%s*(.-)%s*$")
return text
end
-- Convert horizontal rules
local function convertHorizontalRules(text)
-- Convert --- or *** or ___ to ----
text = text:gsub("\n%s*[-*_][-*_%s]*[-*_]%s*\n", "\n----\n")
text = text:gsub("^%s*[-*_][-*_%s]*[-*_]%s*\n", "----\n")
text = text:gsub("\n%s*[-*_][-*_%s]*[-*_]%s*$", "\n----")
return text
end
-- Convert tables
local function convertTables(text)
local lines = {}
for line in text:gmatch("[^\n]*") do
table.insert(lines, line)
end
local result = {}
local inTable = false
local tableLines = {}
for i, line in ipairs(lines) do
if line:match("|") and not line:match("^%s*$") then
table.insert(tableLines, line)
inTable = true
else
if inTable then
-- Process the table
local wikiTable = convertMarkdownTable(tableLines)
for _, tableLine in ipairs(wikiTable) do
table.insert(result, tableLine)
end
tableLines = {}
inTable = false
end
table.insert(result, line)
end
end
-- Handle table at end
if inTable then
local wikiTable = convertMarkdownTable(tableLines)
for _, tableLine in ipairs(wikiTable) do
table.insert(result, tableLine)
end
end
return table.concat(result, "\n")
end
-- Helper function to convert markdown table to wikitable
function convertMarkdownTable(lines)
local result = {"{| class=\"wikitable\""}
local headerProcessed = false
for i, line in ipairs(lines) do
local trimmed = line:match("^%s*(.-)%s*$")
-- Skip separator lines (|---|---|)
if not trimmed:match("^|?[-:%s|]+|?$") then
local cells = {}
for cell in trimmed:gmatch("|([^|]*)") do
local cleanCell = cell:match("^%s*(.-)%s*$")
table.insert(cells, cleanCell)
end
if not headerProcessed then
-- First row is header
table.insert(result, "|-")
for j, cell in ipairs(cells) do
table.insert(result, "! " .. cell)
end
headerProcessed = true
else
-- Data row
table.insert(result, "|-")
for j, cell in ipairs(cells) do
table.insert(result, "| " .. cell)
end
end
end
end
table.insert(result, "|}")
return result
end
-- Main conversion function
function p.convert(frame)
local markdown = frame.args[1] or frame:getParent().args[1] or ""
local stripHtml = frame.args.striphtml or frame:getParent().args.striphtml or "true"
local handleEscapes = frame.args.escapes or frame:getParent().args.escapes or "true"
-- Apply pre-processing
local wikitext = markdown
-- Strip HTML if requested (default: true)
if stripHtml ~= "false" then
wikitext = stripHTML(wikitext)
end
-- Handle escape sequences if requested (default: true)
if handleEscapes ~= "false" then
wikitext = handleEscapeSequences(wikitext)
end
-- Normalize whitespace
wikitext = normalizeWhitespace(wikitext)
-- Apply conversions in order
-- Code blocks first (to protect code from other conversions)
wikitext = convertCode(wikitext)
-- Headers
wikitext = convertHeaders(wikitext)
-- Emphasis and strong
wikitext = convertEmphasis(wikitext)
-- Links and images
wikitext = convertLinks(wikitext)
wikitext = convertImages(wikitext)
-- Lists
wikitext = convertLists(wikitext)
-- Blockquotes
wikitext = convertBlockquotes(wikitext)
-- Tables
wikitext = convertTables(wikitext)
-- Horizontal rules
wikitext = convertHorizontalRules(wikitext)
return wikitext
end
-- Function to just clean/strip content without markdown conversion
function p.clean(frame)
local text = frame.args[1] or frame:getParent().args[1] or ""
local stripHtml = frame.args.striphtml or frame:getParent().args.striphtml or "true"
local handleEscapes = frame.args.escapes or frame:getParent().args.escapes or "true"
local cleaned = text
-- Strip HTML if requested (default: true)
if stripHtml ~= "false" then
cleaned = stripHTML(cleaned)
end
-- Handle escape sequences if requested (default: true)
if handleEscapes ~= "false" then
cleaned = handleEscapeSequences(cleaned)
end
-- Normalize whitespace
cleaned = normalizeWhitespace(cleaned)
return cleaned
end
-- Function to strip only HTML
function p.stripHTML(frame)
local text = frame.args[1] or frame:getParent().args[1] or ""
return stripHTML(text)
end
-- Function to handle only escape sequences
function p.handleEscapes(frame)
local text = frame.args[1] or frame:getParent().args[1] or ""
return handleEscapeSequences(text)
end
-- Function to convert just specific elements
function p.convertHeaders(frame)
local text = frame.args[1] or frame:getParent().args[1] or ""
return convertHeaders(text)
end
function p.convertEmphasis(frame)
local text = frame.args[1] or frame:getParent().args[1] or ""
return convertEmphasis(text)
end
function p.convertLinks(frame)
local text = frame.args[1] or frame:getParent().args[1] or ""
return convertLinks(text)
end
function p.convertLists(frame)
local text = frame.args[1] or frame:getParent().args[1] or ""
return convertLists(text)
end
return p