Toggle menu
Toggle preferences menu
Toggle personal menu
Not logged in
Your IP address will be publicly visible if you make any edits.

Module:MarkdownToWikitext: Difference between revisions

From Vault Hunters Official Wiki
No edit summary
No edit summary
Line 11: Line 11:
local function trim(s)
local function trim(s)
     return s:match('^%s*(.-)%s*$')
     return s:match('^%s*(.-)%s*$')
end
-- Helper function to escape characters that have special meaning in wikitext patterns.
-- This is crucial for safely inserting user-provided text (like URLs or link text)
-- into the replacement part of a gsub.
local function escapePattern(s)
    return s:gsub("([%(%)%.%%%+%-%*%?%[%^%$%]])", "%%%1")
end
end


Line 25: Line 18:


-- Function to strip any existing HTML tags from the input to prevent
-- Function to strip any existing HTML tags from the input to prevent
-- conflicts and potential security issues.
-- conflicts and potential security issues. This version is more robust.
local function stripHtml(text)
local function stripHtml(text)
     -- This removes anything that looks like an HTML tag.
     -- This pattern is more specific and safer than a simple greedy match.
     return text:gsub('<[^>]->', '')
    -- It looks for what looks like a valid tag name after the '<'.
     text = text:gsub('<%/?%w+[^>]*>', '')
    -- A second pass to remove HTML comments, which the above pattern won't catch.
    text = text:gsub('<!%-%-.-%-%->', '')
    return text
end
end


-- Converts Markdown-style headers to wikitext headers.
-- Converts Markdown-style headers to wikitext headers.
-- This function is designed to work on a single line of text.
-- e.g., ## Header 2 -> == Header 2 ==
-- e.g., ## Header 2 -> == Header 2 ==
local function convertHeaders(text)
local function convertHeaders(text)
Line 37: Line 35:
         local h_md = string.rep('#', i)
         local h_md = string.rep('#', i)
         local h_wiki = string.rep('=', i)
         local h_wiki = string.rep('=', i)
         -- Pattern: ^(#{i})%s+(.-)%s*$
         -- The pattern uses '^' to ensure it only matches at the start of the string (line).
        -- Captures the start of a line, the hash marks, and the header text.
         text = text:gsub('^' .. h_md .. '%s+(.-)%s*$', h_wiki .. ' %1 ' .. h_wiki)
         text = text:gsub('^' .. h_md .. '%s+(.-)%s*$', h_wiki .. ' %1 ' .. h_wiki)
     end
     end
Line 48: Line 45:
local function convertEmphasis(text)
local function convertEmphasis(text)
     -- Bold: **text** or __text__ -> '''text'''
     -- Bold: **text** or __text__ -> '''text'''
     text = text:gsub('%*%*([^\n]+)%*%*', "'''%1'''")
    -- The pattern '[^%*]+' is used to handle multiple bold sections on one line correctly.
     text = text:gsub('__([^\n]+)__', "'''%1'''")
     text = text:gsub('%*%*([^%*]+)%*%*', "'''%1'''")
     text = text:gsub('__([^_]+)__', "'''%1'''")
     -- Italic: *text* or _text_ -> ''text''
     -- Italic: *text* or _text_ -> ''text''
     text = text:gsub('%*([^\n]+)%*', "''%1''")
     text = text:gsub('%*([^%*]+)%*', "''%1''")
     text = text:gsub('_([^\n]+)_', "''%1''")
     text = text:gsub('_([^_]+)_', "''%1''")
     return text
     return text
end
end
Line 78: Line 76:
     -- Lazily captures text between backticks.
     -- Lazily captures text between backticks.
     return text:gsub('`([^`\n]+)`', '<code>%1</code>')
     return text:gsub('`([^`\n]+)`', '<code>%1</code>')
end
-- Converts Markdown lists to wikitext lists.
-- Handles both ordered (1.) and unordered (*, -) lists.
local function convertLists(text)
    local lines = {}
    for line in text:gmatch("([^\n]*)\n?") do
        -- Unordered lists
        line = line:gsub('^%s*[%*%-]%s+', '* ')
        -- Ordered lists
        line = line:gsub('^%s*%d+%.%s+', '# ')
        table.insert(lines, line)
    end
    return table.concat(lines, '\n')
end
end


Line 112: Line 96:
     local inputText = frame.args[1] or frame.args.source or ''
     local inputText = frame.args[1] or frame.args.source or ''


     -- 1. Initial sanitization: Strip any pre-existing HTML.
     -- 1. First, strip any HTML-like tags from the entire block of text.
     local wikitext = stripHtml(inputText)
     local text = stripHtml(inputText)
 
    local processedLines = {}
    -- 2. Process the text line by line
    for line in text:gmatch("([^\n]*)\n?") do
        local currentLine = trim(line)
 
        -- Apply transformations. The order of these operations is important.


    -- 2. Block-level conversions (Headers, Lists)
        -- A. Block Transformations (Headers, Lists)
    wikitext = convertHeaders(wikitext)
        currentLine = convertHeaders(currentLine)
    wikitext = convertLists(wikitext)
        -- List conversion is handled directly here.
        currentLine = currentLine:gsub('^%s*[%*%-]%s+', '* ') -- Unordered lists
        currentLine = currentLine:gsub('^%s*%d+%.%s+', '# ')  -- Ordered lists


    -- 3. Inline conversions (Emphasis, Links, Images, Code)
        -- B. Inline Transformations (Emphasis, Links, Code)
    -- These are processed line by line to avoid issues with multi-line matching.
         currentLine = convertEmphasis(currentLine)
    local lines = {}
         currentLine = convertLinksAndImages(currentLine)
    for line in wikitext:gmatch("([^\n]*)\n?") do
         currentLine = convertInlineCode(currentLine)
         line = convertEmphasis(line)
 
         line = convertLinksAndImages(line)
         table.insert(processedLines, currentLine)
         line = convertInlineCode(line)
         table.insert(lines, line)
     end
     end
    wikitext = table.concat(lines, '\n')


     -- 4. Handle paragraph breaks. In Markdown, two newlines create a paragraph.
     -- 3. Re-assemble the processed lines into a single string.
    -- In wikitext, a blank line does the same. Here, we'll replace double
     local wikitext = table.concat(processedLines, '\n')
    -- newlines with a single one to preserve paragraph structure, and then
    -- we can use <br> for single newlines if needed, but standard wikitext
    -- paragraph handling should suffice. Let's ensure paragraphs are separated.
    wikitext = wikitext:gsub('\n\n+', '\n\n') -- Normalize multiple blank lines to one
   
    -- 5. Handle single line breaks within a paragraph. Markdown requires two spaces
    -- at the end of a line for a <br>. We will convert single newlines that are not
    -- part of a list or header into <br /> tags.
     local finalLines = {}
    local inList = false
    for line in wikitext:gmatch("([^\n]*)\n?") do
        local isListOrHeader = line:match('^[%*#=]')
        if #line > 0 and not isListOrHeader and not finalLines[#finalLines]:match('^$') and #finalLines > 0 then
            -- This is a line break within a paragraph, not a new paragraph.
            -- However, standard wikitext handles paragraphs separated by blank lines well.
            -- Let's stick to that and not add <br> tags unless necessary, as it can
            -- lead to messy formatting. The double newline replacement above should be sufficient.
        end
        table.insert(finalLines, line)
    end
    wikitext = table.concat(finalLines, '\n')


    -- 4. Handle paragraph breaks for proper wikitext rendering.
    -- Replace two or more consecutive newlines with just two (a single blank line).
    wikitext = wikitext:gsub('\n\n+', '\n\n')


     return wikitext
     return wikitext

Revision as of 17:29, 24 July 2025

Documentation for this module may be created at Module:MarkdownToWikitext/doc

-- =================================================================
-- Module:Markdown
-- Description: Converts a string of Markdown text into wikitext.
-- Author: Gemini
-- License: CC BY-SA 4.0
-- =================================================================

local p = {}

-- Helper function to trim whitespace from a string
local function trim(s)
    return s:match('^%s*(.-)%s*$')
end

-- =================================================================
-- Core Conversion Functions
-- =================================================================

-- Function to strip any existing HTML tags from the input to prevent
-- conflicts and potential security issues. This version is more robust.
local function stripHtml(text)
    -- This pattern is more specific and safer than a simple greedy match.
    -- It looks for what looks like a valid tag name after the '<'.
    text = text:gsub('<%/?%w+[^>]*>', '')
    -- A second pass to remove HTML comments, which the above pattern won't catch.
    text = text:gsub('<!%-%-.-%-%->', '')
    return text
end

-- Converts Markdown-style headers to wikitext headers.
-- This function is designed to work on a single line of text.
-- e.g., ## Header 2 -> == Header 2 ==
local function convertHeaders(text)
    for i = 6, 1, -1 do
        local h_md = string.rep('#', i)
        local h_wiki = string.rep('=', i)
        -- The pattern uses '^' to ensure it only matches at the start of the string (line).
        text = text:gsub('^' .. h_md .. '%s+(.-)%s*$', h_wiki .. ' %1 ' .. h_wiki)
    end
    return text
end

-- Converts bold and italic syntax.
-- Handles nested cases by doing bold first.
local function convertEmphasis(text)
    -- Bold: **text** or __text__ -> '''text'''
    -- The pattern '[^%*]+' is used to handle multiple bold sections on one line correctly.
    text = text:gsub('%*%*([^%*]+)%*%*', "'''%1'''")
    text = text:gsub('__([^_]+)__', "'''%1'''")
    -- Italic: *text* or _text_ -> ''text''
    text = text:gsub('%*([^%*]+)%*', "''%1''")
    text = text:gsub('_([^_]+)_', "''%1''")
    return text
end

-- Converts Markdown links and images.
-- e.g., [text](url) -> [url text]
-- e.g., ![alt](url) -> [[File:url|alt]]
local function convertLinksAndImages(text)
    -- Images must be processed first, as they are a superset of the link syntax.
    -- Pattern: !%[(.-)%]%((.-)%)
    -- Captures alt text and URL for images.
    text = text:gsub('!%[([^\n%]]*)%]%((.-)%)', '[[File:%2|%1]]')

    -- Links
    -- Pattern: %[(.-)%]%((.-)%)
    -- Captures link text and URL.
    text = text:gsub('%[([^\n%]]+)%]%((.-)%)', '[%2 %1]')
    return text
end

-- Converts inline code blocks.
-- e.g., `code` -> <code>code</code>
local function convertInlineCode(text)
    -- Pattern: `(.-)`
    -- Lazily captures text between backticks.
    return text:gsub('`([^`\n]+)`', '<code>%1</code>')
end

-- =================================================================
-- Main Public Function
-- =================================================================

---
-- The main function to be called from wikitext.
-- It takes a string of Markdown text and converts it to wikitext.
--
-- Usage in wikitext:
-- {{#invoke:Markdown|markdown|source=...}}
-- or when used with ExternalData:
-- {{#invoke:Markdown|markdown|1={{{1}}}}}
---
function p.markdown(frame)
    -- Get the input text. It can be passed as the first argument (e.g., from ExternalData)
    -- or from a named 'source' argument.
    local inputText = frame.args[1] or frame.args.source or ''

    -- 1. First, strip any HTML-like tags from the entire block of text.
    local text = stripHtml(inputText)

    local processedLines = {}
    -- 2. Process the text line by line
    for line in text:gmatch("([^\n]*)\n?") do
        local currentLine = trim(line)

        -- Apply transformations. The order of these operations is important.

        -- A. Block Transformations (Headers, Lists)
        currentLine = convertHeaders(currentLine)
        -- List conversion is handled directly here.
        currentLine = currentLine:gsub('^%s*[%*%-]%s+', '* ') -- Unordered lists
        currentLine = currentLine:gsub('^%s*%d+%.%s+', '# ')   -- Ordered lists

        -- B. Inline Transformations (Emphasis, Links, Code)
        currentLine = convertEmphasis(currentLine)
        currentLine = convertLinksAndImages(currentLine)
        currentLine = convertInlineCode(currentLine)

        table.insert(processedLines, currentLine)
    end

    -- 3. Re-assemble the processed lines into a single string.
    local wikitext = table.concat(processedLines, '\n')

    -- 4. Handle paragraph breaks for proper wikitext rendering.
    -- Replace two or more consecutive newlines with just two (a single blank line).
    wikitext = wikitext:gsub('\n\n+', '\n\n')

    return wikitext
end

return p