Toggle menu
Toggle preferences menu
Toggle personal menu
Not logged in
Your IP address will be publicly visible if you make any edits.

Module:MarkdownToWikitext: Difference between revisions

From Vault Hunters Official Wiki
mNo edit summary
mNo edit summary
 
(11 intermediate revisions by the same user not shown)
Line 1: Line 1:
-- =================================================================
-- Module:Markdown
-- Description: Converts a string of Markdown text into wikitext.
-- Author: Gemini
-- License: CC BY-SA 4.0
-- =================================================================
local p = {}
local p = {}


-- Escape special wikitext characters
-- Helper function to trim whitespace from a string
local function escapeWikitext(text)
local function trim(s)
     if not text then return "" end
     return s:match('^%s*(.-)%s*$')
    -- Escape wikitext special characters
    text = text:gsub("([%[%]{}|=])", "\\%1")
    return text
end
end


-- Convert headers
-- =================================================================
local function convertHeaders(text)
-- Core Conversion Functions
    -- Convert ATX headers (# Header)
-- =================================================================
    text = text:gsub("^(#+)%s*(.-)%s*#*$", function(hashes, content)
        local level = #hashes
        if level > 6 then level = 6 end
        local equals = string.rep("=", level + 1)
        return equals .. " " .. content .. " " .. equals
    end)
   
    return text
end


-- Convert emphasis and strong
-- Function to strip any existing HTML tags from the input.
local function convertEmphasis(text)
-- This version uses a non-greedy match to be safer.
     -- Convert **bold** and __bold__
local function stripHtml(text)
    text = text:gsub("%*%*(.-)%*%*", "'''%1'''")
     -- The non-greedy pattern '<.->' matches a '<', then the fewest
    text = text:gsub("__(.-)__", "'''%1'''")
     -- characters possible (.-) until it finds the next '>', preventing
   
     -- it from consuming large chunks of text if a tag is malformed.
     -- Convert *italic* and _italic_
     text = text:gsub('<.->', '')
    text = text:gsub("([^%*])%*([^%*].-[^%*])%*([^%*])", "%1''%2''%3")
    text = text:gsub("^%*([^%*].-[^%*])%*([^%*])", "''%1''%2")
     text = text:gsub("([^%*])%*([^%*].-[^%*])%*$", "%1''%2''")
    text = text:gsub("^%*([^%*].-[^%*])%*$", "''%1''")
   
    text = text:gsub("([^_])_([^_].-[^_])_([^_])", "%1''%2''%3")
     text = text:gsub("^_([^_].-[^_])_([^_])", "''%1''%2")
    text = text:gsub("([^_])_([^_].-[^_])_$", "%1''%2''")
    text = text:gsub("^_([^_].-[^_])_$", "''%1''")
   
     return text
     return text
end
end


-- Convert links
local function decodeUnicode(text)
local function convertLinks(text)
return text:gsub("\\u(%x%x%x%x)", function(hex)
    -- Convert [text](url) links
local code = tonumber(hex, 16)
    text = text:gsub("%[(.-)%]%((.-)%)", "[%2 %1]")
if code and code < 256 then
   
return string.char(code)
    -- Convert <url> autolinks
else
    text = text:gsub("<(https?://[^>]+)>", "[%1]")
return "" -- skip or replace with placeholder if outside ASCII range
   
end
    return text
end)
end
end


-- Convert images
-- Removes backslashes that escape markdown punctuation.
local function convertImages(text)
local function handleEscapes(text)
     -- Convert ![alt](url) images
     -- e.g., turns `\+` into `+` and `\*` into `*`.
     text = text:gsub("!%[(.-)%]%((.-)%)", "[[File:%2|thumb|%1]]")
    -- Wikitext generally doesn't require these characters to be escaped in prose.
   
     return text:gsub('\\([!%#%*+%-%._`%[%]()])', '%1')
    return text
end
end


-- Convert code blocks and inline code
-- Converts Markdown-style headers to wikitext headers.
local function convertCode(text)
-- This function is designed to work on a single line of text.
     -- Convert fenced code blocks (```lang\ncode\n```) - must be on their own lines
local function convertHeaders(text)
    text = text:gsub("\n```([^\n]*)\n(.-)\n```\n", function(lang, code)
     for i = 6, 1, -1 do
         if lang and lang ~= "" then
        local h_md = string.rep('#', i)
            return "\n<syntaxhighlight lang=\"" .. lang .. "\">\n" .. code .. "\n</syntaxhighlight>\n"
         local h_wiki = string.rep('=', i)
         else
         -- The pattern uses '^' to ensure it only matches at the start of the string (line).
            return "\n<pre>\n" .. code .. "\n</pre>\n"
         text = text:gsub('^' .. h_md .. '%s+(.-)%s*$', h_wiki .. ' %1 ' .. h_wiki)
        end
    end)
   
    -- Handle fenced code blocks at start/end of text
    text = text:gsub("^```([^\n]*)\n(.-)\n```\n", function(lang, code)
        if lang and lang ~= "" then
            return "<syntaxhighlight lang=\"" .. lang .. "\">\n" .. code .. "\n</syntaxhighlight>\n"
         else
            return "<pre>\n" .. code .. "\n</pre>\n"
        end
    end)
   
    text = text:gsub("\n```([^\n]*)\n(.-)\n```$", function(lang, code)
        if lang and lang ~= "" then
            return "\n<syntaxhighlight lang=\"" .. lang .. "\">\n" .. code .. "\n</syntaxhighlight>"
        else
            return "\n<pre>\n" .. code .. "\n</pre>"
        end
    end)
   
    -- Convert indented code blocks (4+ spaces at start of line, multiple consecutive lines)
    local lines = {}
    for line in text:gmatch("[^\n]*") do
        table.insert(lines, line)
    end
   
    local result = {}
    local codeBlock = {}
    local inCodeBlock = false
   
    for i, line in ipairs(lines) do
        local hasLeadingSpaces = line:match("^    ")
        local isEmpty = line:match("^%s*$")
       
        if hasLeadingSpaces then
            -- This line starts with 4+ spaces
            if not inCodeBlock then
                inCodeBlock = true
            end
            table.insert(codeBlock, line:gsub("^    ", ""))
        elseif isEmpty and inCodeBlock then
            -- Empty line in code block - include it
            table.insert(codeBlock, "")
        else
            -- Non-code line
            if inCodeBlock then
                -- End the code block
                if #codeBlock > 0 then
                    table.insert(result, "<pre>")
                    for _, codeLine in ipairs(codeBlock) do
                        table.insert(result, codeLine)
                    end
                    table.insert(result, "</pre>")
                end
                codeBlock = {}
                inCodeBlock = false
            end
            table.insert(result, line)
        end
    end
   
    -- Handle code block at end
    if inCodeBlock and #codeBlock > 0 then
        table.insert(result, "<pre>")
        for _, codeLine in ipairs(codeBlock) do
            table.insert(result, codeLine)
        end
        table.insert(result, "</pre>")
     end
     end
   
    text = table.concat(result, "\n")
   
    -- Convert inline code (`code`) - but not if it spans multiple lines
    text = text:gsub("`([^`\n]+)`", "<code>%1</code>")
   
     return text
     return text
end
end


-- Convert lists
-- Converts bold and italic syntax.
local function convertLists(text)
-- Handles nested cases by doing bold first.
     local lines = {}
local function convertEmphasis(text)
     for line in text:gmatch("[^\n]*") do
     -- Bold: **text** or __text__ -> '''text'''
        table.insert(lines, line)
     text = text:gsub('%*%*([^\n%*]-)%*%*', "'''%1'''")
    end
    text = text:gsub('__([^\n_]-)__', "'''%1'''")
   
    -- Italic: *text* or _text_ -> ''text''
    local result = {}
    text = text:gsub('%*([^\n%*]-)%*', "''%1''")
    local inList = false
    text = text:gsub('_([^\n_]-)_', "''%1''")
    local listType = nil
     return text
   
    for i, line in ipairs(lines) do
        local trimmed = line:match("^%s*(.-)%s*$")
       
        -- Unordered list
        local bulletMatch = line:match("^(%s*)[-*+]%s+(.+)$")
        if bulletMatch then
            local indent, content = line:match("^(%s*)[-*+]%s+(.+)$")
            local level = math.floor(#indent / 2) + 1
            local bullets = string.rep("*", level)
            table.insert(result, bullets .. " " .. content)
            inList = true
            listType = "unordered"
        -- Ordered list
        elseif line:match("^%s*%d+%.%s+") then
            local indent, content = line:match("^(%s*)%d+%.%s+(.+)$")
            local level = math.floor(#indent / 2) + 1
            local bullets = string.rep("#", level)
            table.insert(result, bullets .. " " .. content)
            inList = true
            listType = "ordered"
        else
            if inList and trimmed == "" then
                -- Empty line in list - continue list
                table.insert(result, "")
            else
                inList = false
                listType = nil
                table.insert(result, line)
            end
        end
    end
   
     return table.concat(result, "\n")
end
end


-- Convert blockquotes
-- Converts Markdown links and images.
local function convertBlockquotes(text)
local function convertLinksAndImages(text)
     local lines = {}
     -- Images must be processed first: ![alt](url) -> [[File:url|alt]]
    for line in text:gmatch("[^\n]*") do
     text = text:gsub('!%[([^\n%]]*)%]%((.-)%)', '[[File:%2|%1]]')
        table.insert(lines, line)
     -- Links: [text](url) -> [url text]
     end
     text = text:gsub('%[([^\n%]]+)%]%((.-)%)', '[%2 %1]')
   
     return text
    local result = {}
    local inQuote = false
    local quoteLines = {}
   
    for i, line in ipairs(lines) do
        if line:match("^>%s*") then
            local content = line:gsub("^>%s*", "")
            table.insert(quoteLines, content)
            inQuote = true
        else
            if inQuote then
                -- End of blockquote
                table.insert(result, "<blockquote>")
                for _, quoteLine in ipairs(quoteLines) do
                    table.insert(result, quoteLine)
                end
                table.insert(result, "</blockquote>")
                quoteLines = {}
                inQuote = false
            end
            table.insert(result, line)
        end
    end
   
     -- Handle blockquote at end of text
     if inQuote then
        table.insert(result, "<blockquote>")
        for _, quoteLine in ipairs(quoteLines) do
            table.insert(result, quoteLine)
        end
        table.insert(result, "</blockquote>")
    end
   
     return table.concat(result, "\n")
end
end


-- Strip HTML tags
-- Converts inline code blocks: `code` -> <code>code</code>
local function stripHTML(text)
local function convertInlineCode(text)
    if not text then return "" end
     return text:gsub('`([^`\n]+)`', '<code>%1</code>')
   
    -- Remove HTML comments
    text = text:gsub("<!%-%-.-%-%-?>", "")
   
    -- Remove script and style tags and their content
    text = text:gsub("<[Ss][Cc][Rr][Ii][Pp][Tt][^>]*>.-</[Ss][Cc][Rr][Ii][Pp][Tt]>", "")
    text = text:gsub("<[Ss][Tt][Yy][Ll][Ee][^>]*>.-</[Ss][Tt][Yy][Ll][Ee]>", "")
   
    -- Remove iframe tags and content
    text = text:gsub("<[Ii][Ff][Rr][Aa][Mm][Ee][^>]*>.-</[Ii][Ff][Rr][Aa][Mm][Ee]>", "")
   
    -- Remove paragraph tags entirely (no replacement)
    text = text:gsub("<[Pp]>", "")
     text = text:gsub("</[Pp]>", "")
   
    -- Remove line break tags entirely
    text = text:gsub("<[Bb][Rr][^>]*>", "")
   
    -- Remove self-closing HTML tags (like <img/>, etc.)
    text = text:gsub("<[^>]+/>", "")
   
    -- Remove remaining opening and closing HTML tags
    text = text:gsub("<[^>]+>", "")
   
    return text
end
end


-- Handle escape sequences
-- =================================================================
local function handleEscapeSequences(text)
-- Main Public Function
    if not text then return "" end
-- =================================================================
   
    -- Remove literal \n sequences entirely (don't convert to line breaks)
    text = text:gsub("\\n", "")
   
    -- Convert other common escape sequences
    text = text:gsub("\\r", "")
    text = text:gsub("\\t", " ")  -- Convert tabs to single space
    text = text:gsub("\\\"", "\"")
    text = text:gsub("\\'", "'")
    text = text:gsub("\\\\", "\\")
   
    -- Handle unicode escape sequences (\u0000)
    text = text:gsub("\\u(%x%x%x%x)", function(hex)
        local num = tonumber(hex, 16)
        if num then
            return string.char(num)
        else
            return "\\u" .. hex
        end
    end)
   
    return text
end


-- Clean up extra whitespace and normalize line breaks
---
local function normalizeWhitespace(text)
-- The main function to be called from wikitext.
    if not text then return "" end
-- It takes a string of Markdown text and converts it to wikitext.
   
--
    -- Convert different line break styles to consistent \n
-- Usage in wikitext:
    text = text:gsub("\r\n", "\n")
-- {{#invoke:Markdown|markdown|1={{{1}}}}}
    text = text:gsub("\r", "\n")
---
   
function p.markdown(frame)
    -- Remove excessive blank lines (more than 1 consecutive empty line)
     local inputText = frame.args[1] or frame.args.source or ''
    text = text:gsub("\n\n\n+", "\n\n")
   
    -- Remove trailing spaces on lines
    text = text:gsub(" +\n", "\n")
      
    -- Trim leading and trailing whitespace from the entire text
    text = text:match("^%s*(.-)%s*$")
      
      
     return text
     -- New 1 - Convert unicode back to tags.
end
    local decoded = decodeUnicode(inputText)


-- Convert horizontal rules
     -- 1. First, strip any HTML-like tags from the entire block of text.
local function convertHorizontalRules(text)
     local text = stripHtml(decoded)
     -- Convert --- or *** or ___ to ----
    text = text:gsub("\n%s*[-*_][-*_%s]*[-*_]%s*\n", "\n----\n")
    text = text:gsub("^%s*[-*_][-*_%s]*[-*_]%s*\n", "----\n")
     text = text:gsub("\n%s*[-*_][-*_%s]*[-*_]%s*$", "\n----")
   
    return text
end


-- Convert tables
     local processedLines = {}
local function convertTables(text)
    -- 2. Process the text line by line
     local lines = {}
     for line in text:gmatch("([^\n]*)\n?") do
     for line in text:gmatch("[^\n]*") do
         local currentLine = trim(line)
         table.insert(lines, line)
    end
   
    local result = {}
    local inTable = false
    local tableLines = {}
   
    for i, line in ipairs(lines) do
        if line:match("|") and not line:match("^%s*$") then
            table.insert(tableLines, line)
            inTable = true
        else
            if inTable then
                -- Process the table
                local wikiTable = convertMarkdownTable(tableLines)
                for _, tableLine in ipairs(wikiTable) do
                    table.insert(result, tableLine)
                end
                tableLines = {}
                inTable = false
            end
            table.insert(result, line)
        end
    end
   
    -- Handle table at end
    if inTable then
        local wikiTable = convertMarkdownTable(tableLines)
        for _, tableLine in ipairs(wikiTable) do
            table.insert(result, tableLine)
        end
    end
   
    return table.concat(result, "\n")
end


-- Helper function to convert markdown table to wikitable
        -- Apply transformations. The order of these operations is important.
function convertMarkdownTable(lines)
        currentLine = handleEscapes(currentLine)
    local result = {"{| class=\"wikitable\""}
         currentLine = convertHeaders(currentLine)
    local headerProcessed = false
   
    for i, line in ipairs(lines) do
         local trimmed = line:match("^%s*(.-)%s*$")
          
          
         -- Skip separator lines (|---|---|)
         -- List conversion
         if not trimmed:match("^|?[-:%s|]+|?$") then
         currentLine = currentLine:gsub('^%s*[%*%-]%s+', '* ') -- Unordered lists
            local cells = {}
        currentLine = currentLine:gsub('^%s*%d+%.%s+', '# ')   -- Ordered lists
            for cell in trimmed:gmatch("|([^|]*)") do
                local cleanCell = cell:match("^%s*(.-)%s*$")
                table.insert(cells, cleanCell)
            end
           
            if not headerProcessed then
                -- First row is header
                table.insert(result, "|-")
                for j, cell in ipairs(cells) do
                    table.insert(result, "! " .. cell)
                end
                headerProcessed = true
            else
                -- Data row
                table.insert(result, "|-")
                for j, cell in ipairs(cells) do
                    table.insert(result, "| " .. cell)
                end
            end
        end
    end
   
    table.insert(result, "|}")
    return result
end


-- Main conversion function
         -- Inline Transformations
function p.convert(frame)
         currentLine = convertEmphasis(currentLine)
    local markdown = frame.args[1] or frame:getParent().args[1] or ""
        currentLine = convertLinksAndImages(currentLine)
    local stripHtml = frame.args.striphtml or frame:getParent().args.striphtml or "true"
        currentLine = convertInlineCode(currentLine)
    local handleEscapes = frame.args.escapes or frame:getParent().args.escapes or "true"
   
    -- Apply pre-processing
    local wikitext = markdown
   
    -- Strip HTML if requested (default: true)
    if stripHtml ~= "false" then
         wikitext = stripHTML(wikitext)
    end
   
    -- Handle escape sequences if requested (default: true)
    if handleEscapes ~= "false" then
         wikitext = handleEscapeSequences(wikitext)
    end
   
    -- Normalize whitespace
    wikitext = normalizeWhitespace(wikitext)
   
    -- Apply conversions in order
   
    -- Code blocks first (to protect code from other conversions)
    wikitext = convertCode(wikitext)
   
    -- Headers
    wikitext = convertHeaders(wikitext)
   
    -- Emphasis and strong
    wikitext = convertEmphasis(wikitext)
   
    -- Links and images
    wikitext = convertLinks(wikitext)
    wikitext = convertImages(wikitext)
   
    -- Lists
    wikitext = convertLists(wikitext)
   
    -- Blockquotes
    wikitext = convertBlockquotes(wikitext)
   
    -- Tables
    wikitext = convertTables(wikitext)
   
    -- Horizontal rules
    wikitext = convertHorizontalRules(wikitext)
   
    return wikitext
end


-- Function to just clean/strip content without markdown conversion
        table.insert(processedLines, currentLine)
function p.clean(frame)
    local text = frame.args[1] or frame:getParent().args[1] or ""
    local stripHtml = frame.args.striphtml or frame:getParent().args.striphtml or "true"
    local handleEscapes = frame.args.escapes or frame:getParent().args.escapes or "true"
   
    local cleaned = text
   
    -- Strip HTML if requested (default: true)
    if stripHtml ~= "false" then
        cleaned = stripHTML(cleaned)
    end
   
    -- Handle escape sequences if requested (default: true)
    if handleEscapes ~= "false" then
        cleaned = handleEscapeSequences(cleaned)
     end
     end
   
    -- Normalize whitespace
    cleaned = normalizeWhitespace(cleaned)
   
    return cleaned
end
-- Function to strip only HTML
function p.stripHTML(frame)
    local text = frame.args[1] or frame:getParent().args[1] or ""
    return stripHTML(text)
end


-- Function to handle only escape sequences
    -- 3. Re-assemble the processed lines into a single string.
function p.handleEscapes(frame)
     local wikitext = table.concat(processedLines, '\n')
     local text = frame.args[1] or frame:getParent().args[1] or ""
    return handleEscapeSequences(text)
end


-- Function to convert just specific elements
    -- 4. Handle paragraph breaks for proper wikitext rendering.
function p.convertHeaders(frame)
     -- Replace two or more consecutive newlines with just two (a single blank line).
     local text = frame.args[1] or frame:getParent().args[1] or ""
     wikitext = wikitext:gsub('\n\n+', '\n\n')
     return convertHeaders(text)
end


function p.convertEmphasis(frame)
     return wikitext
    local text = frame.args[1] or frame:getParent().args[1] or ""
    return convertEmphasis(text)
end
 
function p.convertLinks(frame)
    local text = frame.args[1] or frame:getParent().args[1] or ""
    return convertLinks(text)
end
 
function p.convertLists(frame)
    local text = frame.args[1] or frame:getParent().args[1] or ""
     return convertLists(text)
end
end


return p
return p

Latest revision as of 17:57, 24 July 2025

Documentation for this module may be created at Module:MarkdownToWikitext/doc

-- =================================================================
-- Module:Markdown
-- Description: Converts a string of Markdown text into wikitext.
-- Author: Gemini
-- License: CC BY-SA 4.0
-- =================================================================

local p = {}

-- Helper function to trim whitespace from a string
local function trim(s)
    return s:match('^%s*(.-)%s*$')
end

-- =================================================================
-- Core Conversion Functions
-- =================================================================

-- Function to strip any existing HTML tags from the input.
-- This version uses a non-greedy match to be safer.
local function stripHtml(text)
    -- The non-greedy pattern '<.->' matches a '<', then the fewest
    -- characters possible (.-) until it finds the next '>', preventing
    -- it from consuming large chunks of text if a tag is malformed.
    text = text:gsub('<.->', '')
    return text
end

local function decodeUnicode(text)
	return text:gsub("\\u(%x%x%x%x)", function(hex)
	local code = tonumber(hex, 16)
	if code and code < 256 then
		return string.char(code)
	else
		return "" -- skip or replace with placeholder if outside ASCII range
	end
	end)
end

-- Removes backslashes that escape markdown punctuation.
local function handleEscapes(text)
    -- e.g., turns `\+` into `+` and `\*` into `*`.
    -- Wikitext generally doesn't require these characters to be escaped in prose.
    return text:gsub('\\([!%#%*+%-%._`%[%]()])', '%1')
end

-- Converts Markdown-style headers to wikitext headers.
-- This function is designed to work on a single line of text.
local function convertHeaders(text)
    for i = 6, 1, -1 do
        local h_md = string.rep('#', i)
        local h_wiki = string.rep('=', i)
        -- The pattern uses '^' to ensure it only matches at the start of the string (line).
        text = text:gsub('^' .. h_md .. '%s+(.-)%s*$', h_wiki .. ' %1 ' .. h_wiki)
    end
    return text
end

-- Converts bold and italic syntax.
-- Handles nested cases by doing bold first.
local function convertEmphasis(text)
    -- Bold: **text** or __text__ -> '''text'''
    text = text:gsub('%*%*([^\n%*]-)%*%*', "'''%1'''")
    text = text:gsub('__([^\n_]-)__', "'''%1'''")
    -- Italic: *text* or _text_ -> ''text''
    text = text:gsub('%*([^\n%*]-)%*', "''%1''")
    text = text:gsub('_([^\n_]-)_', "''%1''")
    return text
end

-- Converts Markdown links and images.
local function convertLinksAndImages(text)
    -- Images must be processed first: ![alt](url) -> [[File:url|alt]]
    text = text:gsub('!%[([^\n%]]*)%]%((.-)%)', '[[File:%2|%1]]')
    -- Links: [text](url) -> [url text]
    text = text:gsub('%[([^\n%]]+)%]%((.-)%)', '[%2 %1]')
    return text
end

-- Converts inline code blocks: `code` -> <code>code</code>
local function convertInlineCode(text)
    return text:gsub('`([^`\n]+)`', '<code>%1</code>')
end

-- =================================================================
-- Main Public Function
-- =================================================================

---
-- The main function to be called from wikitext.
-- It takes a string of Markdown text and converts it to wikitext.
--
-- Usage in wikitext:
-- {{#invoke:Markdown|markdown|1={{{1}}}}}
---
function p.markdown(frame)
    local inputText = frame.args[1] or frame.args.source or ''
    
    -- New 1 - Convert unicode back to tags.
    local decoded = decodeUnicode(inputText)

    -- 1. First, strip any HTML-like tags from the entire block of text.
    local text = stripHtml(decoded)

    local processedLines = {}
    -- 2. Process the text line by line
    for line in text:gmatch("([^\n]*)\n?") do
        local currentLine = trim(line)

        -- Apply transformations. The order of these operations is important.
        currentLine = handleEscapes(currentLine)
        currentLine = convertHeaders(currentLine)
        
        -- List conversion
        currentLine = currentLine:gsub('^%s*[%*%-]%s+', '* ') -- Unordered lists
        currentLine = currentLine:gsub('^%s*%d+%.%s+', '# ')   -- Ordered lists

        -- Inline Transformations
        currentLine = convertEmphasis(currentLine)
        currentLine = convertLinksAndImages(currentLine)
        currentLine = convertInlineCode(currentLine)

        table.insert(processedLines, currentLine)
    end

    -- 3. Re-assemble the processed lines into a single string.
    local wikitext = table.concat(processedLines, '\n')

    -- 4. Handle paragraph breaks for proper wikitext rendering.
    -- Replace two or more consecutive newlines with just two (a single blank line).
    wikitext = wikitext:gsub('\n\n+', '\n\n')

    return wikitext
end

return p