689
edits
m (grammar) |
(rest of the owl) |
||
Line 1: | Line 1: | ||
-- | -- Converts Latin text to Derani. | ||
-- Supports: syllable splitting, tone, prefixes, hiatus/diphthong marks, punctuation. | |||
-- For cartouches and null-raı, try: deranize("Ruaq sá [poq] sá* da.") | |||
u = mw.ustring | |||
function get_tone(word) | |||
local nfd = u.toNFD(word) | |||
if u.find(nfd, "́") then return 2 end | |||
if u.find(nfd, "̈") then return 3 end | |||
if u.find(nfd, "̂") then return 4 end | |||
return 1 | |||
end | |||
function strip_tone(word) | |||
return u.gsub(u.gsub(u.toNFD(word), "[́̈̂]", ""), "i", "ı") | |||
end | |||
local deranimap = { | local deranimap = { | ||
Line 55: | Line 49: | ||
["e"] = "", -- DERANI LETTER FOFUAQ | ["e"] = "", -- DERANI LETTER FOFUAQ | ||
-- tone marks | -- tone marks | ||
[1] = "", | |||
[2] = "", -- DERANI COMBINING RISING TONE | [2] = "", -- DERANI COMBINING RISING TONE | ||
[3] = "", -- DERANI COMBINING LOW GLOTTAL TONE | [3] = "", -- DERANI COMBINING LOW GLOTTAL TONE | ||
[4] = "", -- DERANI COMBINING RISING-FALLING TONE | [4] = "", -- DERANI COMBINING RISING-FALLING TONE | ||
-- | -- prefix | ||
["-"] = "", | |||
["hiatus"] = "", | |||
["diphthong"] = "", | |||
[""] = "", | |||
} | |||
local derani_punctuation = { | |||
["\""] = "", | |||
[","] = " ", | |||
["."] = " ", | |||
["!"] = " ", | |||
["?"] = " ", | |||
["["] = "", | |||
["]"] = "", | |||
["*"] = " ", | |||
["_"] = "", | |||
} | } | ||
function | function deranize_word(word) | ||
local res = {} | local res = {} | ||
local ix = 1 | local ix = 1 | ||
local len = mw.ustring.len( | local tone = get_tone(word) | ||
local len = mw.ustring.len(word) | |||
-- NB. this is not PCRE regex, see https://www.mediawiki.org/wiki/Extension:Scribunto/Lua_reference_manual#Ustring_patterns | -- NB. this is not PCRE regex, see https://www.mediawiki.org/wiki/Extension:Scribunto/Lua_reference_manual#Ustring_patterns | ||
local toaqre = "()([mbpfndtzcsrljꝡgk']?h?)([auıoe])([̣]?)([́̈̂̀]?)([auıoe]?[auıoe]?)()([qm]?)([-·]?)()([auıoe]?)" | |||
local is_first_syllable = true | |||
local toaqre = "()([mbpfndtzcsrljꝡgk']?h?)([ | while ix <= len do | ||
while ix < len do | |||
local shouldbreak = false | local shouldbreak = false | ||
local | local pos_init, initial, medial, underdot, diacritic, final, pos_precoda, coda, hyphen, pos_postcoda, lingering_vowel = mw.ustring.match(word, toaqre, ix) | ||
if | if pos_init == nil then | ||
break | |||
end | end | ||
res[#res+1] = u.toNFC(u.sub(word, ix, pos_init - 1)) | |||
res[#res+1] = | if coda == "m" and lingering_vowel ~= "" then | ||
if coda == "m" and | |||
coda = "" | coda = "" | ||
ix = | ix = pos_precoda | ||
else | else | ||
ix = | ix = pos_postcoda | ||
end | |||
local glyphs = {initial, tone, medial} | |||
if initial == "" and is_first_syllable then glyphs[1] = "'" end | |||
if not is_first_syllable then glyphs[2] = "" end | |||
if glyphs[1] == "'" or glyphs[1] == "ꝡ" then | |||
-- Move tone onto the first vowel | |||
glyphs[2], glyphs[3] = glyphs[3], glyphs[2] | |||
end | |||
local v = medial..final | |||
if v == "aı" or v == "ao" or v == "eı" or v == "oı" then | |||
glyphs[#glyphs+1] = "diphthong" | |||
elseif final ~= "" then | |||
glyphs[#glyphs+1] = "hiatus" | |||
end | |||
for _, fin in ipairs(mw.text.split(final, "")) do | |||
glyphs[#glyphs+1] = fin | |||
end | end | ||
if coda ~= "" then | if coda ~= "" then | ||
glyphs[#glyphs+1] = coda .. "_" | glyphs[#glyphs+1] = coda .. "_" | ||
end | end | ||
if underdot ~= "" or hyphen ~= "" then | |||
glyphs[#glyphs+1] = | glyphs[#glyphs+1] = "-" | ||
end | end | ||
for _, glyph in ipairs(glyphs) do | for _, glyph in ipairs(glyphs) do | ||
res[#res+1] = deranimap[glyph] | |||
end | end | ||
is_first_syllable = false | |||
end | end | ||
res[#res+1] = u.toNFC(u.sub(word, ix, len)) | |||
return table.concat(res) | return table.concat(res) | ||
end | |||
function deranize(frame) | |||
assert(frame.args[1] ~= nil and frame.args[2] == nil, "This function requires exactly one argument") | |||
local text = u.gsub(u.lower(u.toNFD(frame.args[1])), "i", "ı") | |||
local converted = u.gsub(text, "(%S+)", deranize_word) | |||
for k, v in pairs(derani_punctuation) do | |||
converted = u.gsub(converted, "%" .. k, v) | |||
end | |||
return converted | |||
end | end | ||
return {deranize = deranize} | return {deranize = deranize} |