More actions
No edit summary |
No edit summary |
||
Line 1: | Line 1: | ||
-- ================================================================= | |||
-- Module:Markdown | |||
-- Description: Converts a string of Markdown text into wikitext. | |||
-- Author: Gemini | |||
-- License: CC BY-SA 4.0 | |||
-- ================================================================= | |||
local p = {} | local p = {} | ||
-- Helper to | -- Helper function to trim whitespace from a string | ||
local function | local function trim(s) | ||
return s:match('^%s*(.-)%s*$') | |||
end | |||
-- Helper function to escape characters that have special meaning in wikitext patterns. | |||
-- This is crucial for safely inserting user-provided text (like URLs or link text) | |||
-- into the replacement part of a gsub. | |||
local function escapePattern(s) | |||
return s:gsub("([%(%)%.%%%+%-%*%?%[%^%$%]])", "%%%1") | |||
end | |||
-- ================================================================= | |||
-- Core Conversion Functions | |||
-- ================================================================= | |||
-- Function to strip any existing HTML tags from the input to prevent | |||
-- conflicts and potential security issues. | |||
local function stripHtml(text) | |||
-- This removes anything that looks like an HTML tag. | |||
return text:gsub('<[^>]->', '') | |||
end | end | ||
-- | -- Converts Markdown-style headers to wikitext headers. | ||
local function | -- e.g., ## Header 2 -> == Header 2 == | ||
local function convertHeaders(text) | |||
for i = 6, 1, -1 do | |||
local h_md = string.rep('#', i) | |||
local h_wiki = string.rep('=', i) | |||
-- Pattern: ^(#{i})%s+(.-)%s*$ | |||
-- Captures the start of a line, the hash marks, and the header text. | |||
text = text:gsub('^' .. h_md .. '%s+(.-)%s*$', h_wiki .. ' %1 ' .. h_wiki) | |||
end | |||
return text | |||
end | end | ||
-- Converts | -- Converts bold and italic syntax. | ||
local function | -- Handles nested cases by doing bold first. | ||
local function convertEmphasis(text) | |||
-- Bold: **text** or __text__ -> '''text''' | |||
text = text:gsub('%*%*([^\n]+)%*%*', "'''%1'''") | |||
text = text:gsub('__([^\n]+)__', "'''%1'''") | |||
-- Italic: *text* or _text_ -> ''text'' | |||
text = text:gsub('%*([^\n]+)%*', "''%1''") | |||
text = text:gsub('_([^\n]+)_', "''%1''") | |||
return text | |||
end | |||
-- Converts Markdown links and images. | |||
-- e.g., [text](url) -> [url text] | |||
-- e.g.,  -> [[File:url|alt]] | |||
local function convertLinksAndImages(text) | |||
-- Images must be processed first, as they are a superset of the link syntax. | |||
-- Pattern: !%[(.-)%]%((.-)%) | |||
-- Captures alt text and URL for images. | |||
text = text:gsub('!%[([^\n%]]*)%]%((.-)%)', '[[File:%2|%1]]') | |||
-- Links | |||
-- Pattern: %[(.-)%]%((.-)%) | |||
-- Captures link text and URL. | |||
text = text:gsub('%[([^\n%]]+)%]%((.-)%)', '[%2 %1]') | |||
return text | |||
end | |||
-- Converts inline code blocks. | |||
-- e.g., `code` -> <code>code</code> | |||
local function convertInlineCode(text) | |||
-- Pattern: `(.-)` | |||
-- Lazily captures text between backticks. | |||
return text:gsub('`([^`\n]+)`', '<code>%1</code>') | |||
end | |||
-- Converts Markdown lists to wikitext lists. | |||
-- Handles both ordered (1.) and unordered (*, -) lists. | |||
local function convertLists(text) | |||
local lines = {} | |||
for line in text:gmatch("([^\n]*)\n?") do | |||
-- Unordered lists | |||
line = line:gsub('^%s*[%*%-]%s+', '* ') | |||
-- Ordered lists | |||
line = line:gsub('^%s*%d+%.%s+', '# ') | |||
table.insert(lines, line) | |||
end | |||
return table.concat(lines, '\n') | |||
end | end | ||
-- Main | -- ================================================================= | ||
function p. | -- Main Public Function | ||
-- ================================================================= | |||
--- | |||
-- The main function to be called from wikitext. | |||
-- It takes a string of Markdown text and converts it to wikitext. | |||
-- | |||
-- Usage in wikitext: | |||
-- {{#invoke:Markdown|markdown|source=...}} | |||
-- or when used with ExternalData: | |||
-- {{#invoke:Markdown|markdown|1={{{1}}}}} | |||
--- | |||
function p.markdown(frame) | |||
-- Get the input text. It can be passed as the first argument (e.g., from ExternalData) | |||
-- or from a named 'source' argument. | |||
local inputText = frame.args[1] or frame.args.source or '' | |||
-- 1. Initial sanitization: Strip any pre-existing HTML. | |||
local wikitext = stripHtml(inputText) | |||
-- 2. Block-level conversions (Headers, Lists) | |||
wikitext = convertHeaders(wikitext) | |||
wikitext = convertLists(wikitext) | |||
-- 3. Inline conversions (Emphasis, Links, Images, Code) | |||
-- These are processed line by line to avoid issues with multi-line matching. | |||
local lines = {} | |||
for line in wikitext:gmatch("([^\n]*)\n?") do | |||
line = convertEmphasis(line) | |||
line = convertLinksAndImages(line) | |||
line = convertInlineCode(line) | |||
table.insert(lines, line) | |||
end | |||
wikitext = table.concat(lines, '\n') | |||
-- 4. Handle paragraph breaks. In Markdown, two newlines create a paragraph. | |||
-- In wikitext, a blank line does the same. Here, we'll replace double | |||
-- newlines with a single one to preserve paragraph structure, and then | |||
-- we can use <br> for single newlines if needed, but standard wikitext | |||
-- paragraph handling should suffice. Let's ensure paragraphs are separated. | |||
wikitext = wikitext:gsub('\n\n+', '\n\n') -- Normalize multiple blank lines to one | |||
-- 5. Handle single line breaks within a paragraph. Markdown requires two spaces | |||
-- at the end of a line for a <br>. We will convert single newlines that are not | |||
-- part of a list or header into <br /> tags. | |||
local finalLines = {} | |||
local inList = false | |||
for line in wikitext:gmatch("([^\n]*)\n?") do | |||
local isListOrHeader = line:match('^[%*#=]') | |||
if #line > 0 and not isListOrHeader and not finalLines[#finalLines]:match('^$') and #finalLines > 0 then | |||
-- This is a line break within a paragraph, not a new paragraph. | |||
-- However, standard wikitext handles paragraphs separated by blank lines well. | |||
-- Let's stick to that and not add <br> tags unless necessary, as it can | |||
-- lead to messy formatting. The double newline replacement above should be sufficient. | |||
end | |||
table.insert(finalLines, line) | |||
end | |||
wikitext = table.concat(finalLines, '\n') | |||
return wikitext | |||
end | end | ||
return p | return p |
Revision as of 17:19, 24 July 2025
Documentation for this module may be created at Module:MarkdownToWikitext/doc
-- =================================================================
-- Module:Markdown
-- Description: Converts a string of Markdown text into wikitext.
-- Author: Gemini
-- License: CC BY-SA 4.0
-- =================================================================
local p = {}
-- Helper function to trim whitespace from a string
local function trim(s)
return s:match('^%s*(.-)%s*$')
end
-- Helper function to escape characters that have special meaning in wikitext patterns.
-- This is crucial for safely inserting user-provided text (like URLs or link text)
-- into the replacement part of a gsub.
local function escapePattern(s)
return s:gsub("([%(%)%.%%%+%-%*%?%[%^%$%]])", "%%%1")
end
-- =================================================================
-- Core Conversion Functions
-- =================================================================
-- Function to strip any existing HTML tags from the input to prevent
-- conflicts and potential security issues.
local function stripHtml(text)
-- This removes anything that looks like an HTML tag.
return text:gsub('<[^>]->', '')
end
-- Converts Markdown-style headers to wikitext headers.
-- e.g., ## Header 2 -> == Header 2 ==
local function convertHeaders(text)
for i = 6, 1, -1 do
local h_md = string.rep('#', i)
local h_wiki = string.rep('=', i)
-- Pattern: ^(#{i})%s+(.-)%s*$
-- Captures the start of a line, the hash marks, and the header text.
text = text:gsub('^' .. h_md .. '%s+(.-)%s*$', h_wiki .. ' %1 ' .. h_wiki)
end
return text
end
-- Converts bold and italic syntax.
-- Handles nested cases by doing bold first.
local function convertEmphasis(text)
-- Bold: **text** or __text__ -> '''text'''
text = text:gsub('%*%*([^\n]+)%*%*', "'''%1'''")
text = text:gsub('__([^\n]+)__', "'''%1'''")
-- Italic: *text* or _text_ -> ''text''
text = text:gsub('%*([^\n]+)%*', "''%1''")
text = text:gsub('_([^\n]+)_', "''%1''")
return text
end
-- Converts Markdown links and images.
-- e.g., [text](url) -> [url text]
-- e.g.,  -> [[File:url|alt]]
local function convertLinksAndImages(text)
-- Images must be processed first, as they are a superset of the link syntax.
-- Pattern: !%[(.-)%]%((.-)%)
-- Captures alt text and URL for images.
text = text:gsub('!%[([^\n%]]*)%]%((.-)%)', '[[File:%2|%1]]')
-- Links
-- Pattern: %[(.-)%]%((.-)%)
-- Captures link text and URL.
text = text:gsub('%[([^\n%]]+)%]%((.-)%)', '[%2 %1]')
return text
end
-- Converts inline code blocks.
-- e.g., `code` -> <code>code</code>
local function convertInlineCode(text)
-- Pattern: `(.-)`
-- Lazily captures text between backticks.
return text:gsub('`([^`\n]+)`', '<code>%1</code>')
end
-- Converts Markdown lists to wikitext lists.
-- Handles both ordered (1.) and unordered (*, -) lists.
local function convertLists(text)
local lines = {}
for line in text:gmatch("([^\n]*)\n?") do
-- Unordered lists
line = line:gsub('^%s*[%*%-]%s+', '* ')
-- Ordered lists
line = line:gsub('^%s*%d+%.%s+', '# ')
table.insert(lines, line)
end
return table.concat(lines, '\n')
end
-- =================================================================
-- Main Public Function
-- =================================================================
---
-- The main function to be called from wikitext.
-- It takes a string of Markdown text and converts it to wikitext.
--
-- Usage in wikitext:
-- {{#invoke:Markdown|markdown|source=...}}
-- or when used with ExternalData:
-- {{#invoke:Markdown|markdown|1={{{1}}}}}
---
function p.markdown(frame)
-- Get the input text. It can be passed as the first argument (e.g., from ExternalData)
-- or from a named 'source' argument.
local inputText = frame.args[1] or frame.args.source or ''
-- 1. Initial sanitization: Strip any pre-existing HTML.
local wikitext = stripHtml(inputText)
-- 2. Block-level conversions (Headers, Lists)
wikitext = convertHeaders(wikitext)
wikitext = convertLists(wikitext)
-- 3. Inline conversions (Emphasis, Links, Images, Code)
-- These are processed line by line to avoid issues with multi-line matching.
local lines = {}
for line in wikitext:gmatch("([^\n]*)\n?") do
line = convertEmphasis(line)
line = convertLinksAndImages(line)
line = convertInlineCode(line)
table.insert(lines, line)
end
wikitext = table.concat(lines, '\n')
-- 4. Handle paragraph breaks. In Markdown, two newlines create a paragraph.
-- In wikitext, a blank line does the same. Here, we'll replace double
-- newlines with a single one to preserve paragraph structure, and then
-- we can use <br> for single newlines if needed, but standard wikitext
-- paragraph handling should suffice. Let's ensure paragraphs are separated.
wikitext = wikitext:gsub('\n\n+', '\n\n') -- Normalize multiple blank lines to one
-- 5. Handle single line breaks within a paragraph. Markdown requires two spaces
-- at the end of a line for a <br>. We will convert single newlines that are not
-- part of a list or header into <br /> tags.
local finalLines = {}
local inList = false
for line in wikitext:gmatch("([^\n]*)\n?") do
local isListOrHeader = line:match('^[%*#=]')
if #line > 0 and not isListOrHeader and not finalLines[#finalLines]:match('^$') and #finalLines > 0 then
-- This is a line break within a paragraph, not a new paragraph.
-- However, standard wikitext handles paragraphs separated by blank lines well.
-- Let's stick to that and not add <br> tags unless necessary, as it can
-- lead to messy formatting. The double newline replacement above should be sufficient.
end
table.insert(finalLines, line)
end
wikitext = table.concat(finalLines, '\n')
return wikitext
end
return p