Module:Deranize: Difference between revisions
(reformat lookup table) |
(typosquat) |
||
Line 83: | Line 83: | ||
res[#res+1] = mw.ustring.toNFC(mw.ustring.sub(normalized, ix, initpos - 1)) | res[#res+1] = mw.ustring.toNFC(mw.ustring.sub(normalized, ix, initpos - 1)) | ||
if shouldbreak then break end | if shouldbreak then break end | ||
if coda == "m" and | if coda == "m" and lingeringvowel ~= "" then | ||
coda = "" | coda = "" | ||
ix = precoda | ix = precoda |
Revision as of 19:29, 18 December 2022
Use with Template:Deranize. {{deranize|mamıeq doe}}
renders like so: (Scrıpt error: The functıon "clean" does not exıst.).
-- either mw.ustring.toNFD or mw.ustring.match doesn't work properly, so resorting to this for the time being (lol)
local untone = {
["a"] = {"a", 1},
["u"] = {"u", 1},
["ı"] = {"ı", 1},
["o"] = {"o", 1},
["e"] = {"e", 1},
["á"] = {"a", 2},
["ú"] = {"u", 2},
["í"] = {"ı", 2},
["ó"] = {"o", 2},
["é"] = {"e", 2},
["ä"] = {"a", 3},
["ü"] = {"u", 3},
["ï"] = {"ı", 3},
["ö"] = {"o", 3},
["ë"] = {"e", 3},
["â"] = {"a", 4},
["û"] = {"u", 4},
["î"] = {"ı", 4},
["ô"] = {"o", 4},
["ê"] = {"e", 4},
}
local deranimap = {
-- consonants
["m"] = "", -- DERANI LETTER MAMEI
["m_"] = "", -- DERANI LETTER MAMEI CODA
["b"] = "", -- DERANI LETTER BUBUE
["p"] = "", -- DERANI LETTER PIPOQ
["f"] = "", -- DERANI LETTER FOFUAQ
["n"] = "", -- DERANI LETTER NANAQ
["d"] = "", -- DERANI LETTER DUDEO
["t"] = "", -- DERANI LETTER TITIEQ
["z"] = "", -- DERANI LETTER ZOZEO
["c"] = "", -- DERANI LETTER CECOA
["s"] = "", -- DERANI LETTER SAQSEOQ
["r"] = "", -- DERANI LETTER RAIRUA
["l"] = "", -- DERANI LETTER LAOLIQ
["nh"] = "", -- DERANI LETTER NHANHOQ
["j"] = "", -- DERANI LETTER JUJUO
["ch"] = "", -- DERANI LETTER CHICHAO
["sh"] = "", -- DERANI LETTER SHOSHIA
["ꝡ"] = "", -- DERANI LETTER VEVA
["q_"] = "", -- DERANI LETTER AQ-AQ
["g"] = "", -- DERANI LETTER GUGUI
["k"] = "", -- DERANI LETTER KIKUE
["'"] = "", -- DERANI LETTER O-AOMO
["h"] = "", -- DERANI LETTER HEHAQ
-- vowels
["a"] = "", -- DERANI LETTER SAQSEOQ
["ı"] = "", -- DERANI LETTER CECOA
["u"] = "", -- DERANI LETTER BUBUE
["o"] = "", -- DERANI LETTER GUGUI
["e"] = "", -- DERANI LETTER FOFUAQ
-- tone marks
[2] = "", -- DERANI COMBINING RISING TONE
[3] = "", -- DERANI COMBINING LOW GLOTTAL TONE
[4] = "", -- DERANI COMBINING RISING-FALLING TONE
-- TODO: the rest of the owl
}
-- {{#invoke:Deranize|deranize|arg}} (TODO: turn this into a family friendly template)
function deranize(frame)
assert(frame.args[1] ~= nil and frame.args[2] == nil, "This function requires exactly one argument")
local normalized = mw.ustring.gsub(mw.ustring.lower(frame.args[1]), "i", "ı")
mw.log(normalized)
local res = {}
local ix = 1
local len = mw.ustring.len(normalized)
-- NB. this is not PCRE regex, see https://www.mediawiki.org/wiki/Extension:Scribunto/Lua_reference_manual#Ustring_patterns
-- ↓ initpos (these ones eval to an index) precoda ↓ ↓ postcoda
-- ↓ initial ↓ medial ↓ final ↓ coda ↓ lingeringvowel
local toaqre = "()([mbpfndtzcsrljꝡgk']?h?)([auıoeáúíóéäüïöëâûîôê])([auıoe]?[auıoe]?)()([qm]?)()([auıoe]?)"
while ix < len do
local shouldbreak = false
local initpos, initial, medial, final, precoda, coda, postcoda, lingeringvowel = mw.ustring.match(normalized, toaqre, ix)
if initpos == nil then
shouldbreak = true
initpos = nil
end
mw.log(table.concat({ix, initpos, initial, medial, final, precoda, coda, postcoda, lingeringvowel}, ", "))
res[#res+1] = mw.ustring.toNFC(mw.ustring.sub(normalized, ix, initpos - 1))
if shouldbreak then break end
if coda == "m" and lingeringvowel ~= "" then
coda = ""
ix = precoda
else
ix = postcoda
end
local nucleus, tone = unpack(untone[medial])
local glyphs = {initial, nucleus, tone}
if coda ~= "" then
glyphs[#glyphs+1] = coda .. "_"
end
for _, fin in ipairs(mw.text.split(final, "")) do
glyphs[#glyphs+1] = fin
end
for _, glyph in ipairs(glyphs) do
local mapped = deranimap[glyph]
if mapped then
res[#res+1] = mapped
end
end
-- TODO: actually implement derani hiatus symbol, o'aomo, etc. insertion logic here 🤪
end
return table.concat(res)
end
return {deranize = deranize}