Module:Deranize: Difference between revisions

Update Neirani (replace vocalic pipoq with titieq)
(add diphthong mark in VF sequences)
(Update Neirani (replace vocalic pipoq with titieq))
 
(8 intermediate revisions by 2 users not shown)
Line 58: Line 58:
   ["diphthong"] = "󱛎",
   ["diphthong"] = "󱛎",
   [""] = "",
   [""] = "",
  -- neirani
  ["aı"] = "󱚶", -- DERANI LETTER DUDEO
  ["ao"] = "󱚷", -- DERANI LETTER TITIEQ
  ["eı"] = "󱚸", -- DERANI LETTER ZOZEO
  ["oı"] = "󱚽", -- DERANI LETTER NHANHOQ
}
}


Line 72: Line 77:
   ["_"] = "󱛛", -- compatibility nbsp
   ["_"] = "󱛛", -- compatibility nbsp
}
}
local vowel_lookalikes = {
  ["s"] = true,
  ["b"] = true,
  ["c"] = true,
  ["g"] = true,
  ["f"] = true,
}
local diphthong_lookalikes = {
  ["d"] = true,
  ["p"] = true,
  ["z"] = true,
  ["nh"] = true,
}
local neirani = false


function deranize_word(word)
function deranize_word(word)
Line 79: Line 101:
   local len = mw.ustring.len(word)
   local len = mw.ustring.len(word)
   -- NB. this is not PCRE regex, see https://www.mediawiki.org/wiki/Extension:Scribunto/Lua_reference_manual#Ustring_patterns
   -- NB. this is not PCRE regex, see https://www.mediawiki.org/wiki/Extension:Scribunto/Lua_reference_manual#Ustring_patterns
   local toaqre = "()([mbpfndtzcsrljꝡgk']?h?)([auıoe])([̣]?)([́̈̂̀]?)([auıoe]?[auıoe]?)()([qm]?)([-·]?)()([auıoe]?)"
   local toaqre = "()([mbpfndtzcsrljꝡgk']?h?)([auıoe])([̣]?)([́̈̂̀]?)([auıoe]?[auıoe]?)()([qm]?)([-·]?)()([mbpfndtzcsrljꝡgk']?h?)([auıoe]?)"
   local is_first_syllable = true
   local is_first_syllable = true
   while ix <= len do
   while ix <= len do
     local shouldbreak = false
     local shouldbreak = false
     local pos_init, initial, medial, underdot, diacritic, final, pos_precoda, coda, hyphen, pos_postcoda, lingering_vowel = mw.ustring.match(word, toaqre, ix)
     local pos_init, initial, medial, underdot, diacritic, final, pos_precoda, coda, hyphen, pos_postcoda, next_initial, next_medial = mw.ustring.match(word, toaqre, ix)
     if pos_init == nil then
     if pos_init == nil then
       break
       break
     end
     end
     res[#res+1] = u.toNFC(u.sub(word, ix, pos_init - 1))
     res[#res+1] = u.toNFC(u.sub(word, ix, pos_init - 1))
     if coda == "m" and lingering_vowel ~= "" then
     if coda == "m" and next_initial == "" and next_medial ~= "" then
       coda = ""
       coda = ""
      next_initial = "m"
       ix = pos_precoda
       ix = pos_precoda
     else
     else
Line 95: Line 118:
     end
     end


    local v = medial..final
    local is_diphthong = v == "aı" or v == "ao" or v == "eı" or v == "oı"
    if is_diphthong and neirani then medial, final = v, "" end
     local glyphs = {initial, tone, medial}
     local glyphs = {initial, tone, medial}
     if initial == "" and is_first_syllable then glyphs[1] = "'" end
     if initial == ""
      and is_first_syllable
      and (final == "" or (neirani and is_diphthong))
      and coda == ""
      and (vowel_lookalikes[next_initial] or (neirani and diphthong_lookalikes[next_initial])) then
      glyphs[1] = "'"
    end
     if not is_first_syllable then glyphs[2] = "" end
     if not is_first_syllable then glyphs[2] = "" end
     if glyphs[1] == "'" or glyphs[1] == "ꝡ" then
     if glyphs[1] == "" or glyphs[1] == "'" or glyphs[1] == "ꝡ" then
       -- Move tone onto the first vowel
       -- Move tone onto the first vowel
       glyphs[2], glyphs[3] = glyphs[3], glyphs[2]
       glyphs[2], glyphs[3] = glyphs[3], glyphs[2]
     end
     end
    local v = medial..final
     if is_diphthong and not neirani then
     if v == "aı" or v == "ao" or v == "eı" or v == "oı" then
       glyphs[#glyphs+1] = "diphthong"
       glyphs[#glyphs+1] = "diphthong"
     elseif final ~= "" then
     elseif final ~= "" then
       glyphs[#glyphs+1] = "hiatus"
       glyphs[#glyphs+1] = "hiatus"
     end
     end
     for j, fin in ipairs(mw.text.split(final, "")) do
     if neirani then
      if j > 1 then glyphs[#glyphs+1] = "diphthong" end
      glyphs[#glyphs+1] = final
      glyphs[#glyphs+1] = fin
    else
      for j, fin in ipairs(mw.text.split(final, "")) do
        if j > 1 then glyphs[#glyphs+1] = "diphthong" end
        glyphs[#glyphs+1] = fin
      end
     end
     end
     if coda ~= "" then
     if coda ~= "" then
Line 119: Line 154:
     end
     end
     for _, glyph in ipairs(glyphs) do
     for _, glyph in ipairs(glyphs) do
       res[#res+1] = deranimap[glyph]
       res[#res+1] = deranimap[glyph] or "(" .. glyph .. "?)"
     end
     end
     is_first_syllable = false
     is_first_syllable = false
Line 128: Line 163:


function deranize(frame)
function deranize(frame)
   assert(frame.args[1] ~= nil and frame.args[2] == nil, "This function requires exactly one argument")
   assert(frame.args[1] ~= nil, "This function requires at least one argument")
  neirani = frame.args[2] ~= nil
   local text = u.gsub(u.lower(u.toNFD(frame.args[1])), "i", "ı")
   local text = u.gsub(u.lower(u.toNFD(frame.args[1])), "i", "ı")
   local converted = u.gsub(text, "(%S+)", deranize_word)
   local converted = u.gsub(text, "(%S+)", deranize_word)