216
edits
m (discard gsub count) |
(Update Neirani (replace vocalic pipoq with titieq)) |
||
| (15 intermediate revisions by 2 users not shown) | |||
| Line 58: | Line 58: | ||
["diphthong"] = "", | ["diphthong"] = "", | ||
[""] = "", | [""] = "", | ||
-- neirani | |||
["aı"] = "", -- DERANI LETTER DUDEO | |||
["ao"] = "", -- DERANI LETTER TITIEQ | |||
["eı"] = "", -- DERANI LETTER ZOZEO | |||
["oı"] = "", -- DERANI LETTER NHANHOQ | |||
} | } | ||
local derani_punctuation = { | local derani_punctuation = { | ||
["\""] = "", | ["\""] = "", | ||
[":"] = "", | |||
[","] = " ", | [","] = " ", | ||
["."] = " ", | ["."] = " ", | ||
| Line 69: | Line 75: | ||
["]"] = "", | ["]"] = "", | ||
["*"] = " ", | ["*"] = " ", | ||
["_"] = "", | ["_"] = "", -- compatibility nbsp | ||
} | } | ||
local vowel_lookalikes = { | |||
["s"] = true, | |||
["b"] = true, | |||
["c"] = true, | |||
["g"] = true, | |||
["f"] = true, | |||
} | |||
local diphthong_lookalikes = { | |||
["d"] = true, | |||
["p"] = true, | |||
["z"] = true, | |||
["nh"] = true, | |||
} | |||
local neirani = false | |||
function deranize_word(word) | function deranize_word(word) | ||
| Line 78: | Line 101: | ||
local len = mw.ustring.len(word) | local len = mw.ustring.len(word) | ||
-- NB. this is not PCRE regex, see https://www.mediawiki.org/wiki/Extension:Scribunto/Lua_reference_manual#Ustring_patterns | -- NB. this is not PCRE regex, see https://www.mediawiki.org/wiki/Extension:Scribunto/Lua_reference_manual#Ustring_patterns | ||
local toaqre = "()([mbpfndtzcsrljꝡgk']?h?)([auıoe])([̣]?)([́̈̂̀]?)([auıoe]?[auıoe]?)()([qm]?)([-·]?)()([auıoe]?)" | local toaqre = "()([mbpfndtzcsrljꝡgk']?h?)([auıoe])([̣]?)([́̈̂̀]?)([auıoe]?[auıoe]?)()([qm]?)([-·]?)()([mbpfndtzcsrljꝡgk']?h?)([auıoe]?)" | ||
local is_first_syllable = true | local is_first_syllable = true | ||
while ix <= len do | while ix <= len do | ||
local shouldbreak = false | local shouldbreak = false | ||
local pos_init, initial, medial, underdot, diacritic, final, pos_precoda, coda, hyphen, pos_postcoda, | local pos_init, initial, medial, underdot, diacritic, final, pos_precoda, coda, hyphen, pos_postcoda, next_initial, next_medial = mw.ustring.match(word, toaqre, ix) | ||
if pos_init == nil then | if pos_init == nil then | ||
break | break | ||
end | end | ||
res[#res+1] = u.toNFC(u.sub(word, ix, pos_init - 1)) | res[#res+1] = u.toNFC(u.sub(word, ix, pos_init - 1)) | ||
if coda == "m" and | if coda == "m" and next_initial == "" and next_medial ~= "" then | ||
coda = "" | coda = "" | ||
next_initial = "m" | |||
ix = pos_precoda | ix = pos_precoda | ||
else | else | ||
| Line 94: | Line 118: | ||
end | end | ||
local v = medial..final | |||
local is_diphthong = v == "aı" or v == "ao" or v == "eı" or v == "oı" | |||
if is_diphthong and neirani then medial, final = v, "" end | |||
local glyphs = {initial, tone, medial} | local glyphs = {initial, tone, medial} | ||
if initial == "" and is_first_syllable then glyphs[1] = "'" end | if initial == "" | ||
and is_first_syllable | |||
and (final == "" or (neirani and is_diphthong)) | |||
and coda == "" | |||
and (vowel_lookalikes[next_initial] or (neirani and diphthong_lookalikes[next_initial])) then | |||
glyphs[1] = "'" | |||
end | |||
if not is_first_syllable then glyphs[2] = "" end | if not is_first_syllable then glyphs[2] = "" end | ||
if glyphs[1] == "'" or glyphs[1] == "ꝡ" then | if glyphs[1] == "" or glyphs[1] == "'" or glyphs[1] == "ꝡ" then | ||
-- Move tone onto the first vowel | -- Move tone onto the first vowel | ||
glyphs[2], glyphs[3] = glyphs[3], glyphs[2] | glyphs[2], glyphs[3] = glyphs[3], glyphs[2] | ||
end | end | ||
if is_diphthong and not neirani then | |||
if | |||
glyphs[#glyphs+1] = "diphthong" | glyphs[#glyphs+1] = "diphthong" | ||
elseif final ~= "" then | elseif final ~= "" then | ||
glyphs[#glyphs+1] = "hiatus" | glyphs[#glyphs+1] = "hiatus" | ||
end | end | ||
for | if neirani then | ||
glyphs[#glyphs+1] = final | |||
else | |||
for j, fin in ipairs(mw.text.split(final, "")) do | |||
if j > 1 then glyphs[#glyphs+1] = "diphthong" end | |||
glyphs[#glyphs+1] = fin | |||
end | |||
end | end | ||
if coda ~= "" then | if coda ~= "" then | ||
| Line 117: | Line 154: | ||
end | end | ||
for _, glyph in ipairs(glyphs) do | for _, glyph in ipairs(glyphs) do | ||
res[#res+1] = deranimap[glyph] | res[#res+1] = deranimap[glyph] or "(" .. glyph .. "?)" | ||
end | end | ||
is_first_syllable = false | is_first_syllable = false | ||
| Line 126: | Line 163: | ||
function deranize(frame) | function deranize(frame) | ||
assert(frame.args[1] ~ | assert(frame.args[1] ~= nil, "This function requires at least one argument") | ||
neirani = frame.args[2] ~= nil | |||
local text = u.gsub(u.lower(u.toNFD(frame.args[1])), "i", "ı") | local text = u.gsub(u.lower(u.toNFD(frame.args[1])), "i", "ı") | ||
local converted = u.gsub(text, "(%S+)", deranize_word) | local converted = u.gsub(text, "(%S+)", deranize_word) | ||
| Line 137: | Line 175: | ||
function clean(frame) | function clean(frame) | ||
assert(frame.args[1] ~= nil and frame.args[2] == nil, "This function requires exactly one argument") | assert(frame.args[1] ~= nil and frame.args[2] == nil, "This function requires exactly one argument") | ||
local cleaned = u.gsub(frame.args[1], "[%[%]*]", "") | local cleaned = u.gsub(frame.args[1], "[:%[%]*]", "") | ||
local nbsp = string.char(0xC2, 0xA0) | |||
cleaned = u.gsub(cleaned, "_", nbsp) | |||
return cleaned | return cleaned | ||
end | end | ||
return {deranize = deranize, clean = clean} | return {deranize = deranize, clean = clean} | ||