Module:Deranize: Difference between revisions

Update Neirani (replace vocalic pipoq with titieq)
m (test)
(Update Neirani (replace vocalic pipoq with titieq))
 
(3 intermediate revisions by the same user not shown)
Line 58: Line 58:
   ["diphthong"] = "󱛎",
   ["diphthong"] = "󱛎",
   [""] = "",
   [""] = "",
   -- diphthong reform
   -- neirani
   ["aı"] = "󱚶", -- DERANI LETTER DUDEO
   ["aı"] = "󱚶", -- DERANI LETTER DUDEO
   ["ao"] = "󱚳", -- DERANI LETTER PIPOQ
   ["ao"] = "󱚷", -- DERANI LETTER TITIEQ
   ["eı"] = "󱚸", -- DERANI LETTER ZOZEO
   ["eı"] = "󱚸", -- DERANI LETTER ZOZEO
   ["oı"] = "󱚽", -- DERANI LETTER NHANHOQ
   ["oı"] = "󱚽", -- DERANI LETTER NHANHOQ
Line 78: Line 78:
}
}


local reform = false
local vowel_lookalikes = {
  ["s"] = true,
  ["b"] = true,
  ["c"] = true,
  ["g"] = true,
  ["f"] = true,
}
 
local diphthong_lookalikes = {
  ["d"] = true,
  ["p"] = true,
  ["z"] = true,
  ["nh"] = true,
}
 
local neirani = false


function deranize_word(word)
function deranize_word(word)
Line 86: Line 101:
   local len = mw.ustring.len(word)
   local len = mw.ustring.len(word)
   -- NB. this is not PCRE regex, see https://www.mediawiki.org/wiki/Extension:Scribunto/Lua_reference_manual#Ustring_patterns
   -- NB. this is not PCRE regex, see https://www.mediawiki.org/wiki/Extension:Scribunto/Lua_reference_manual#Ustring_patterns
   local toaqre = "()([mbpfndtzcsrljꝡgk']?h?)([auıoe])([̣]?)([́̈̂̀]?)([auıoe]?[auıoe]?)()([qm]?)([-·]?)()([auıoe]?)"
   local toaqre = "()([mbpfndtzcsrljꝡgk']?h?)([auıoe])([̣]?)([́̈̂̀]?)([auıoe]?[auıoe]?)()([qm]?)([-·]?)()([mbpfndtzcsrljꝡgk']?h?)([auıoe]?)"
   local is_first_syllable = true
   local is_first_syllable = true
   while ix <= len do
   while ix <= len do
     local shouldbreak = false
     local shouldbreak = false
     local pos_init, initial, medial, underdot, diacritic, final, pos_precoda, coda, hyphen, pos_postcoda, lingering_vowel = mw.ustring.match(word, toaqre, ix)
     local pos_init, initial, medial, underdot, diacritic, final, pos_precoda, coda, hyphen, pos_postcoda, next_initial, next_medial = mw.ustring.match(word, toaqre, ix)
     if pos_init == nil then
     if pos_init == nil then
       break
       break
     end
     end
     res[#res+1] = u.toNFC(u.sub(word, ix, pos_init - 1))
     res[#res+1] = u.toNFC(u.sub(word, ix, pos_init - 1))
     if coda == "m" and lingering_vowel ~= "" then
     if coda == "m" and next_initial == "" and next_medial ~= "" then
       coda = ""
       coda = ""
      next_initial = "m"
       ix = pos_precoda
       ix = pos_precoda
     else
     else
Line 104: Line 120:
     local v = medial..final
     local v = medial..final
     local is_diphthong = v == "aı" or v == "ao" or v == "eı" or v == "oı"
     local is_diphthong = v == "aı" or v == "ao" or v == "eı" or v == "oı"
     if is_diphthong and reform then medial, final = v, "" end
     if is_diphthong and neirani then medial, final = v, "" end
     local glyphs = {initial, tone, medial}
     local glyphs = {initial, tone, medial}
     if initial == "" and is_first_syllable then glyphs[1] = "'" end
     if initial == ""
      and is_first_syllable
      and (final == "" or (neirani and is_diphthong))
      and coda == ""
      and (vowel_lookalikes[next_initial] or (neirani and diphthong_lookalikes[next_initial])) then
      glyphs[1] = "'"
    end
     if not is_first_syllable then glyphs[2] = "" end
     if not is_first_syllable then glyphs[2] = "" end
     if glyphs[1] == "'" or glyphs[1] == "ꝡ" then
     if glyphs[1] == "" or glyphs[1] == "'" or glyphs[1] == "ꝡ" then
       -- Move tone onto the first vowel
       -- Move tone onto the first vowel
       glyphs[2], glyphs[3] = glyphs[3], glyphs[2]
       glyphs[2], glyphs[3] = glyphs[3], glyphs[2]
     end
     end
     if is_diphthong and not reform then
     if is_diphthong and not neirani then
       glyphs[#glyphs+1] = "diphthong"
       glyphs[#glyphs+1] = "diphthong"
     elseif final ~= "" then
     elseif final ~= "" then
       glyphs[#glyphs+1] = "hiatus"
       glyphs[#glyphs+1] = "hiatus"
     end
     end
     if reform then
     if neirani then
       glyphs[#glyphs+1] = final
       glyphs[#glyphs+1] = final
     else
     else
Line 142: Line 164:
function deranize(frame)
function deranize(frame)
   assert(frame.args[1] ~= nil, "This function requires at least one argument")
   assert(frame.args[1] ~= nil, "This function requires at least one argument")
   reform = frame.args[2] ~= nil
   neirani = frame.args[2] ~= nil
   local text = u.gsub(u.lower(u.toNFD(frame.args[1])), "i", "ı")
   local text = u.gsub(u.lower(u.toNFD(frame.args[1])), "i", "ı")
   local converted = u.gsub(text, "(%S+)", deranize_word)
   local converted = u.gsub(text, "(%S+)", deranize_word)