More actions
No edit summary |
mNo edit summary |
||
(7 intermediate revisions by the same user not shown) | |||
Line 1: | Line 1: | ||
-- ================================================================= | |||
-- Module:Markdown | |||
-- Description: Converts a string of Markdown text into wikitext. | |||
-- Author: Gemini | |||
-- License: CC BY-SA 4.0 | |||
-- ================================================================= | |||
local p = {} | local p = {} | ||
-- | -- Helper function to trim whitespace from a string | ||
local function | local function trim(s) | ||
return s:match('^%s*(.-)%s*$') | |||
end | end | ||
-- | -- ================================================================= | ||
local function | -- Core Conversion Functions | ||
-- ================================================================= | |||
-- Function to strip any existing HTML tags from the input. | |||
-- This version uses a non-greedy match to be safer. | |||
local function stripHtml(text) | |||
-- The non-greedy pattern '<.->' matches a '<', then the fewest | |||
-- characters possible (.-) until it finds the next '>', preventing | |||
-- it from consuming large chunks of text if a tag is malformed. | |||
text = text:gsub('<.->', '') | |||
return text | |||
end | end | ||
local function decodeUnicode(text) | |||
function | return text:gsub("\\u(%x%x%x%x)", function(hex) | ||
if | local code = tonumber(hex, 16) | ||
if code and code < 256 then | |||
return string.char(code) | |||
else | |||
return "" -- skip or replace with placeholder if outside ASCII range | |||
end | |||
end) | |||
end | |||
-- Removes backslashes that escape markdown punctuation. | |||
local function handleEscapes(text) | |||
-- e.g., turns `\+` into `+` and `\*` into `*`. | |||
-- Wikitext generally doesn't require these characters to be escaped in prose. | |||
return text:gsub('\\([!%#%*+%-%._`%[%]()])', '%1') | |||
end | |||
-- Converts Markdown-style headers to wikitext headers. | |||
-- This function is designed to work on a single line of text. | |||
local function convertHeaders(text) | |||
for i = 6, 1, -1 do | |||
local h_md = string.rep('#', i) | |||
local h_wiki = string.rep('=', i) | |||
-- The pattern uses '^' to ensure it only matches at the start of the string (line). | |||
text = text:gsub('^' .. h_md .. '%s+(.-)%s*$', h_wiki .. ' %1 ' .. h_wiki) | |||
end | |||
return text | |||
end | |||
-- Converts bold and italic syntax. | |||
-- Handles nested cases by doing bold first. | |||
local function convertEmphasis(text) | |||
-- Bold: **text** or __text__ -> '''text''' | |||
text = text:gsub('%*%*([^\n%*]-)%*%*', "'''%1'''") | |||
text = text:gsub('__([^\n_]-)__', "'''%1'''") | |||
-- Italic: *text* or _text_ -> ''text'' | |||
text = text:gsub('%*([^\n%*]-)%*', "''%1''") | |||
text = text:gsub('_([^\n_]-)_', "''%1''") | |||
return text | |||
end | |||
-- Converts Markdown links and images. | |||
local function convertLinksAndImages(text) | |||
-- Images must be processed first:  -> [[File:url|alt]] | |||
text = text:gsub('!%[([^\n%]]*)%]%((.-)%)', '[[File:%2|%1]]') | |||
-- Links: [text](url) -> [url text] | |||
text = text:gsub('%[([^\n%]]+)%]%((.-)%)', '[%2 %1]') | |||
return text | |||
end | |||
-- Converts inline code blocks: `code` -> <code>code</code> | |||
local function convertInlineCode(text) | |||
return text:gsub('`([^`\n]+)`', '<code>%1</code>') | |||
end | |||
-- ================================================================= | |||
-- Main Public Function | |||
-- ================================================================= | |||
--- | |||
-- The main function to be called from wikitext. | |||
-- It takes a string of Markdown text and converts it to wikitext. | |||
-- | |||
-- Usage in wikitext: | |||
-- {{#invoke:Markdown|markdown|1={{{1}}}}} | |||
--- | |||
function p.markdown(frame) | |||
local inputText = frame.args[1] or frame.args.source or '' | |||
-- New 1 - Convert unicode back to tags. | |||
local decoded = decodeUnicode(inputText) | |||
-- 1. First, strip any HTML-like tags from the entire block of text. | |||
local text = stripHtml(decoded) | |||
local processedLines = {} | |||
-- 2. Process the text line by line | |||
for line in text:gmatch("([^\n]*)\n?") do | |||
local currentLine = trim(line) | |||
-- Apply transformations. The order of these operations is important. | |||
currentLine = handleEscapes(currentLine) | |||
currentLine = convertHeaders(currentLine) | |||
-- List conversion | |||
currentLine = currentLine:gsub('^%s*[%*%-]%s+', '* ') -- Unordered lists | |||
currentLine = currentLine:gsub('^%s*%d+%.%s+', '# ') -- Ordered lists | |||
-- Inline Transformations | |||
currentLine = convertEmphasis(currentLine) | |||
currentLine = convertLinksAndImages(currentLine) | |||
currentLine = convertInlineCode(currentLine) | |||
table.insert(processedLines, currentLine) | |||
end | |||
-- 3. Re-assemble the processed lines into a single string. | |||
local wikitext = table.concat(processedLines, '\n') | |||
-- 4. Handle paragraph breaks for proper wikitext rendering. | |||
-- Replace two or more consecutive newlines with just two (a single blank line). | |||
wikitext = wikitext:gsub('\n\n+', '\n\n') | |||
return wikitext | |||
end | end | ||
return p | return p |
Latest revision as of 17:57, 24 July 2025
Documentation for this module may be created at Module:MarkdownToWikitext/doc
-- =================================================================
-- Module:Markdown
-- Description: Converts a string of Markdown text into wikitext.
-- Author: Gemini
-- License: CC BY-SA 4.0
-- =================================================================
local p = {}
-- Helper function to trim whitespace from a string
local function trim(s)
return s:match('^%s*(.-)%s*$')
end
-- =================================================================
-- Core Conversion Functions
-- =================================================================
-- Function to strip any existing HTML tags from the input.
-- This version uses a non-greedy match to be safer.
local function stripHtml(text)
-- The non-greedy pattern '<.->' matches a '<', then the fewest
-- characters possible (.-) until it finds the next '>', preventing
-- it from consuming large chunks of text if a tag is malformed.
text = text:gsub('<.->', '')
return text
end
local function decodeUnicode(text)
return text:gsub("\\u(%x%x%x%x)", function(hex)
local code = tonumber(hex, 16)
if code and code < 256 then
return string.char(code)
else
return "" -- skip or replace with placeholder if outside ASCII range
end
end)
end
-- Removes backslashes that escape markdown punctuation.
local function handleEscapes(text)
-- e.g., turns `\+` into `+` and `\*` into `*`.
-- Wikitext generally doesn't require these characters to be escaped in prose.
return text:gsub('\\([!%#%*+%-%._`%[%]()])', '%1')
end
-- Converts Markdown-style headers to wikitext headers.
-- This function is designed to work on a single line of text.
local function convertHeaders(text)
for i = 6, 1, -1 do
local h_md = string.rep('#', i)
local h_wiki = string.rep('=', i)
-- The pattern uses '^' to ensure it only matches at the start of the string (line).
text = text:gsub('^' .. h_md .. '%s+(.-)%s*$', h_wiki .. ' %1 ' .. h_wiki)
end
return text
end
-- Converts bold and italic syntax.
-- Handles nested cases by doing bold first.
local function convertEmphasis(text)
-- Bold: **text** or __text__ -> '''text'''
text = text:gsub('%*%*([^\n%*]-)%*%*', "'''%1'''")
text = text:gsub('__([^\n_]-)__', "'''%1'''")
-- Italic: *text* or _text_ -> ''text''
text = text:gsub('%*([^\n%*]-)%*', "''%1''")
text = text:gsub('_([^\n_]-)_', "''%1''")
return text
end
-- Converts Markdown links and images.
local function convertLinksAndImages(text)
-- Images must be processed first:  -> [[File:url|alt]]
text = text:gsub('!%[([^\n%]]*)%]%((.-)%)', '[[File:%2|%1]]')
-- Links: [text](url) -> [url text]
text = text:gsub('%[([^\n%]]+)%]%((.-)%)', '[%2 %1]')
return text
end
-- Converts inline code blocks: `code` -> <code>code</code>
local function convertInlineCode(text)
return text:gsub('`([^`\n]+)`', '<code>%1</code>')
end
-- =================================================================
-- Main Public Function
-- =================================================================
---
-- The main function to be called from wikitext.
-- It takes a string of Markdown text and converts it to wikitext.
--
-- Usage in wikitext:
-- {{#invoke:Markdown|markdown|1={{{1}}}}}
---
function p.markdown(frame)
local inputText = frame.args[1] or frame.args.source or ''
-- New 1 - Convert unicode back to tags.
local decoded = decodeUnicode(inputText)
-- 1. First, strip any HTML-like tags from the entire block of text.
local text = stripHtml(decoded)
local processedLines = {}
-- 2. Process the text line by line
for line in text:gmatch("([^\n]*)\n?") do
local currentLine = trim(line)
-- Apply transformations. The order of these operations is important.
currentLine = handleEscapes(currentLine)
currentLine = convertHeaders(currentLine)
-- List conversion
currentLine = currentLine:gsub('^%s*[%*%-]%s+', '* ') -- Unordered lists
currentLine = currentLine:gsub('^%s*%d+%.%s+', '# ') -- Ordered lists
-- Inline Transformations
currentLine = convertEmphasis(currentLine)
currentLine = convertLinksAndImages(currentLine)
currentLine = convertInlineCode(currentLine)
table.insert(processedLines, currentLine)
end
-- 3. Re-assemble the processed lines into a single string.
local wikitext = table.concat(processedLines, '\n')
-- 4. Handle paragraph breaks for proper wikitext rendering.
-- Replace two or more consecutive newlines with just two (a single blank line).
wikitext = wikitext:gsub('\n\n+', '\n\n')
return wikitext
end
return p