Module:Deranize: Difference between revisions
m (clean :s) |
(clean up Derani nbsp) |
||
Line 70: | Line 70: | ||
["]"] = "", | ["]"] = "", | ||
["*"] = " ", | ["*"] = " ", | ||
["_"] = "", | ["_"] = "", -- compatibility nbsp | ||
} | } | ||
Line 139: | Line 139: | ||
assert(frame.args[1] ~= nil and frame.args[2] == nil, "This function requires exactly one argument") | assert(frame.args[1] ~= nil and frame.args[2] == nil, "This function requires exactly one argument") | ||
local cleaned = u.gsub(frame.args[1], "[:%[%]*]", "") | local cleaned = u.gsub(frame.args[1], "[:%[%]*]", "") | ||
cleaned = u.gsub(frame.args[1], "_", "\xa0") | |||
return cleaned | return cleaned | ||
end | end | ||
return {deranize = deranize, clean = clean} | return {deranize = deranize, clean = clean} |
Revision as of 14:33, 3 May 2024
Use with Template:Deranize. {{deranize|mamıeq doe}}
renders like so: (mamıeq doe).
-- Converts Latin text to Derani.
-- Supports: syllable splitting, tone, prefixes, hiatus/diphthong marks, punctuation.
-- For cartouches and null-raı, try: deranize("Ruaq sá [poq] sá* da.")
u = mw.ustring
function get_tone(word)
local nfd = u.toNFD(word)
if u.find(nfd, "́") then return 2 end
if u.find(nfd, "̈") then return 3 end
if u.find(nfd, "̂") then return 4 end
return 1
end
function strip_tone(word)
return u.gsub(u.gsub(u.toNFD(word), "[́̈̂]", ""), "i", "ı")
end
local deranimap = {
-- consonants
["m"] = "", -- DERANI LETTER MAMEI
["m_"] = "", -- DERANI LETTER MAMEI CODA
["b"] = "", -- DERANI LETTER BUBUE
["p"] = "", -- DERANI LETTER PIPOQ
["f"] = "", -- DERANI LETTER FOFUAQ
["n"] = "", -- DERANI LETTER NANAQ
["d"] = "", -- DERANI LETTER DUDEO
["t"] = "", -- DERANI LETTER TITIEQ
["z"] = "", -- DERANI LETTER ZOZEO
["c"] = "", -- DERANI LETTER CECOA
["s"] = "", -- DERANI LETTER SAQSEOQ
["r"] = "", -- DERANI LETTER RAIRUA
["l"] = "", -- DERANI LETTER LAOLIQ
["nh"] = "", -- DERANI LETTER NHANHOQ
["j"] = "", -- DERANI LETTER JUJUO
["ch"] = "", -- DERANI LETTER CHICHAO
["sh"] = "", -- DERANI LETTER SHOSHIA
["ꝡ"] = "", -- DERANI LETTER VEVA
["q_"] = "", -- DERANI LETTER AQ-AQ
["g"] = "", -- DERANI LETTER GUGUI
["k"] = "", -- DERANI LETTER KIKUE
["'"] = "", -- DERANI LETTER O-AOMO
["h"] = "", -- DERANI LETTER HEHAQ
-- vowels
["a"] = "", -- DERANI LETTER SAQSEOQ
["ı"] = "", -- DERANI LETTER CECOA
["u"] = "", -- DERANI LETTER BUBUE
["o"] = "", -- DERANI LETTER GUGUI
["e"] = "", -- DERANI LETTER FOFUAQ
-- tone marks
[1] = "",
[2] = "", -- DERANI COMBINING RISING TONE
[3] = "", -- DERANI COMBINING LOW GLOTTAL TONE
[4] = "", -- DERANI COMBINING RISING-FALLING TONE
-- prefix
["-"] = "",
["hiatus"] = "",
["diphthong"] = "",
[""] = "",
}
local derani_punctuation = {
["\""] = "",
[":"] = "",
[","] = " ",
["."] = " ",
["!"] = " ",
["?"] = " ",
["["] = "",
["]"] = "",
["*"] = " ",
["_"] = "", -- compatibility nbsp
}
function deranize_word(word)
local res = {}
local ix = 1
local tone = get_tone(word)
local len = mw.ustring.len(word)
-- NB. this is not PCRE regex, see https://www.mediawiki.org/wiki/Extension:Scribunto/Lua_reference_manual#Ustring_patterns
local toaqre = "()([mbpfndtzcsrljꝡgk']?h?)([auıoe])([̣]?)([́̈̂̀]?)([auıoe]?[auıoe]?)()([qm]?)([-·]?)()([auıoe]?)"
local is_first_syllable = true
while ix <= len do
local shouldbreak = false
local pos_init, initial, medial, underdot, diacritic, final, pos_precoda, coda, hyphen, pos_postcoda, lingering_vowel = mw.ustring.match(word, toaqre, ix)
if pos_init == nil then
break
end
res[#res+1] = u.toNFC(u.sub(word, ix, pos_init - 1))
if coda == "m" and lingering_vowel ~= "" then
coda = ""
ix = pos_precoda
else
ix = pos_postcoda
end
local glyphs = {initial, tone, medial}
if initial == "" and is_first_syllable then glyphs[1] = "'" end
if not is_first_syllable then glyphs[2] = "" end
if glyphs[1] == "'" or glyphs[1] == "ꝡ" then
-- Move tone onto the first vowel
glyphs[2], glyphs[3] = glyphs[3], glyphs[2]
end
local v = medial..final
if v == "aı" or v == "ao" or v == "eı" or v == "oı" then
glyphs[#glyphs+1] = "diphthong"
elseif final ~= "" then
glyphs[#glyphs+1] = "hiatus"
end
for _, fin in ipairs(mw.text.split(final, "")) do
glyphs[#glyphs+1] = fin
end
if coda ~= "" then
glyphs[#glyphs+1] = coda .. "_"
end
if underdot ~= "" or hyphen ~= "" then
glyphs[#glyphs+1] = "-"
end
for _, glyph in ipairs(glyphs) do
res[#res+1] = deranimap[glyph]
end
is_first_syllable = false
end
res[#res+1] = u.toNFC(u.sub(word, ix, len))
return table.concat(res)
end
function deranize(frame)
assert(frame.args[1] ~= nil and frame.args[2] == nil, "This function requires exactly one argument")
local text = u.gsub(u.lower(u.toNFD(frame.args[1])), "i", "ı")
local converted = u.gsub(text, "(%S+)", deranize_word)
for k, v in pairs(derani_punctuation) do
converted = u.gsub(converted, "%" .. k, v)
end
return converted
end
function clean(frame)
assert(frame.args[1] ~= nil and frame.args[2] == nil, "This function requires exactly one argument")
local cleaned = u.gsub(frame.args[1], "[:%[%]*]", "")
cleaned = u.gsub(frame.args[1], "_", "\xa0")
return cleaned
end
return {deranize = deranize, clean = clean}