Module:Deranize: Difference between revisions
(first draft of ⚠INCOMPLETE⚠ and frankly code-style-wise quite repulsive "deranization" function that we could use in a template to auto-deranize everything, who knows) |
(checking how frame.args actually works) |
||
Line 63: | Line 63: | ||
-- {{#invoke:Deranize|deranize|arg}} (TODO: turn this into a family friendly template) | -- {{#invoke:Deranize|deranize|arg}} (TODO: turn this into a family friendly template) | ||
function deranize(frame) | function deranize(frame) | ||
return #frame.args | |||
end | |||
function arsntersiontei() | |||
assert(#frame.args == 1, "This function requires exactly one argument") | assert(#frame.args == 1, "This function requires exactly one argument") | ||
local normalized = mw.ustring.gsub(mw.ustring.lower(frame.args[1]), "i", "ı") | local normalized = mw.ustring.gsub(mw.ustring.lower(frame.args[1]), "i", "ı") |
Revision as of 19:20, 18 December 2022
Use with Template:Deranize. {{deranize|mamıeq doe}}
renders like so: 0 (Scrıpt error: The functıon "clean" does not exıst.).
-- either mw.ustring.toNFD or mw.ustring.match doesn't work properly, so resorting to this for the time being (lol)
local untone = {
["a"] = {"a", 1},
["u"] = {"u", 1},
["ı"] = {"ı", 1},
["o"] = {"o", 1},
["e"] = {"e", 1},
["á"] = {"a", 2},
["ú"] = {"u", 2},
["í"] = {"ı", 2},
["ó"] = {"o", 2},
["é"] = {"e", 2},
["ä"] = {"a", 3},
["ü"] = {"u", 3},
["ï"] = {"ı", 3},
["ö"] = {"o", 3},
["ë"] = {"e", 3},
["â"] = {"a", 4},
["û"] = {"u", 4},
["î"] = {"ı", 4},
["ô"] = {"o", 4},
["ê"] = {"e", 4},
}
local deranitable = {
-- consonants
m = "", -- DERANI LETTER MAMEI
m_ = "", -- DERANI LETTER MAMEI CODA
b = "", -- DERANI LETTER BUBUE
p = "", -- DERANI LETTER PIPOQ
f = "", -- DERANI LETTER FOFUAQ
n = "", -- DERANI LETTER NANAQ
d = "", -- DERANI LETTER DUDEO
t = "", -- DERANI LETTER TITIEQ
z = "", -- DERANI LETTER ZOZEO
c = "", -- DERANI LETTER CECOA
s = "", -- DERANI LETTER SAQSEOQ
r = "", -- DERANI LETTER RAIRUA
l = "", -- DERANI LETTER LAOLIQ
nh = "", -- DERANI LETTER NHANHOQ
j = "", -- DERANI LETTER JUJUO
ch = "", -- DERANI LETTER CHICHAO
sh = "", -- DERANI LETTER SHOSHIA
["ꝡ"] = "", -- DERANI LETTER VEVA
q_ = "", -- DERANI LETTER AQ-AQ
g = "", -- DERANI LETTER GUGUI
k = "", -- DERANI LETTER KIKUE
["'"] = "", -- DERANI LETTER O-AOMO
h = "", -- DERANI LETTER HEHAQ
-- vowels
a = "", -- DERANI LETTER SAQSEOQ
["ı"] = "", -- DERANI LETTER CECOA
u = "", -- DERANI LETTER BUBUE
o = "", -- DERANI LETTER GUGUI
e = "", -- DERANI LETTER FOFUAQ
-- tone marks
[2] = "", -- DERANI COMBINING RISING TONE
[3] = "", -- DERANI COMBINING LOW GLOTTAL TONE
[4] = "", -- DERANI COMBINING RISING-FALLING TONE
-- TODO: the rest of the owl
}
-- {{#invoke:Deranize|deranize|arg}} (TODO: turn this into a family friendly template)
function deranize(frame)
return #frame.args
end
function arsntersiontei()
assert(#frame.args == 1, "This function requires exactly one argument")
local normalized = mw.ustring.gsub(mw.ustring.lower(frame.args[1]), "i", "ı")
mw.log(normalized)
local res = {}
local ix = 1
local len = mw.ustring.len(normalized)
-- NB. this is not PCRE regex, see https://www.mediawiki.org/wiki/Extension:Scribunto/Lua_reference_manual#Ustring_patterns
-- ↓ initpos (these ones eval to an index) precoda ↓ ↓ postcoda
-- ↓ initial ↓ medial ↓ final ↓ coda ↓ lingeringvowel
local toaqre = "()([mbpfndtzcsrljꝡgk']?h?)([auıoeáúíóéäüïöëâûîôê])([auıoe]?[auıoe]?)()([qm]?)()([auıoe]?)"
while ix < len do
local shouldbreak = false
local initpos, initial, medial, final, precoda, coda, postcoda, lingeringvowel = mw.ustring.match(normalized, toaqre, ix)
if initpos == nil then
shouldbreak = true
initpos = nil
end
mw.log(table.concat({ix, initpos, initial, medial, final, precoda, coda, postcoda, lingeringvowel}, ", "))
res[#res+1] = mw.ustring.toNFC(mw.ustring.sub(normalized, ix, initpos - 1))
if shouldbreak then break end
if coda == "m" and lingervowel ~= "" then
coda = ""
ix = precoda
else
ix = postcoda
end
local nucleus, tone = unpack(untone[medial])
local glyphs = {initial, nucleus, tone}
if coda ~= "" then
glyphs[#glyphs+1] = coda .. "_"
end
for _, fin in ipairs(mw.text.split(final, "")) do
glyphs[#glyphs+1] = fin
end
for _, glyph in ipairs(glyphs) do
local mapped = deranitable[glyph]
if mapped then
res[#res+1] = mapped
end
end
-- TODO: actually implement derani hiatus symbol, o'aomo, etc. insertion logic here 🤪
end
return table.concat(res)
end
return {deranitable = deranitable, deranize = deranize}