Module:Deranize: Difference between revisions

From The Toaq Wiki
(Fix tone marks with implicit o'aomo)
(Update Neirani (replace vocalic pipoq with titieq))
 
(One intermediate revision by the same user not shown)
Line 58: Line 58:
   ["diphthong"] = "󱛎",
   ["diphthong"] = "󱛎",
   [""] = "",
   [""] = "",
   -- diphthong reform
   -- neirani
   ["aı"] = "󱚶", -- DERANI LETTER DUDEO
   ["aı"] = "󱚶", -- DERANI LETTER DUDEO
   ["ao"] = "󱚳", -- DERANI LETTER PIPOQ
   ["ao"] = "󱚷", -- DERANI LETTER TITIEQ
   ["eı"] = "󱚸", -- DERANI LETTER ZOZEO
   ["eı"] = "󱚸", -- DERANI LETTER ZOZEO
   ["oı"] = "󱚽", -- DERANI LETTER NHANHOQ
   ["oı"] = "󱚽", -- DERANI LETTER NHANHOQ
Line 93: Line 93:
}
}


local reform = false
local neirani = false


function deranize_word(word)
function deranize_word(word)
Line 120: Line 120:
     local v = medial..final
     local v = medial..final
     local is_diphthong = v == "aı" or v == "ao" or v == "eı" or v == "oı"
     local is_diphthong = v == "aı" or v == "ao" or v == "eı" or v == "oı"
     if is_diphthong and reform then medial, final = v, "" end
     if is_diphthong and neirani then medial, final = v, "" end
     local glyphs = {initial, tone, medial}
     local glyphs = {initial, tone, medial}
     if initial == "" and is_first_syllable then
     if initial == ""
       if (final == "" or (reform and is_diphthong))
      and is_first_syllable
        and coda == ""
       and (final == "" or (neirani and is_diphthong))
        and (vowel_lookalikes[next_initial] or (reform and diphthong_lookalikes[next_initial])) then
      and coda == ""
        glyphs[1] = "'"
      and (vowel_lookalikes[next_initial] or (neirani and diphthong_lookalikes[next_initial])) then
      else
      glyphs[1] = "'"
        glyphs[1] = ""
      end
     end
     end
     if not is_first_syllable then glyphs[2] = "" end
     if not is_first_syllable then glyphs[2] = "" end
     if glyphs[1] == "'" or glyphs[1] == "ꝡ" then
     if glyphs[1] == "" or glyphs[1] == "'" or glyphs[1] == "ꝡ" then
       -- Move tone onto the first vowel
       -- Move tone onto the first vowel
       glyphs[2], glyphs[3] = glyphs[3], glyphs[2]
       glyphs[2], glyphs[3] = glyphs[3], glyphs[2]
     end
     end
     if is_diphthong and not reform then
     if is_diphthong and not neirani then
       glyphs[#glyphs+1] = "diphthong"
       glyphs[#glyphs+1] = "diphthong"
     elseif final ~= "" then
     elseif final ~= "" then
       glyphs[#glyphs+1] = "hiatus"
       glyphs[#glyphs+1] = "hiatus"
     end
     end
     if reform then
     if neirani then
       glyphs[#glyphs+1] = final
       glyphs[#glyphs+1] = final
     else
     else
Line 166: Line 164:
function deranize(frame)
function deranize(frame)
   assert(frame.args[1] ~= nil, "This function requires at least one argument")
   assert(frame.args[1] ~= nil, "This function requires at least one argument")
   reform = frame.args[2] ~= nil
   neirani = frame.args[2] ~= nil
   local text = u.gsub(u.lower(u.toNFD(frame.args[1])), "i", "ı")
   local text = u.gsub(u.lower(u.toNFD(frame.args[1])), "i", "ı")
   local converted = u.gsub(text, "(%S+)", deranize_word)
   local converted = u.gsub(text, "(%S+)", deranize_word)

Latest revision as of 03:26, 27 September 2025

Use with Template:Deranize. {{deranize|mamıeq doe}} renders like so: 󱚰󱚺󱚰󱚹󱛍󱚴󱛂 󱚶󱛃󱛍󱚴 (mamıeq doe).


-- Converts Latin text to Derani.
-- Supports: syllable splitting, tone, prefixes, hiatus/diphthong marks, punctuation.
-- For cartouches and null-raı, try: deranize("Ruaq sá [poq] sá* da.")

u = mw.ustring

function get_tone(word)
  local nfd = u.toNFD(word)
  if u.find(nfd, "́") then return 2 end
  if u.find(nfd, "̈") then return 3 end
  if u.find(nfd, "̂") then return 4 end
  return 1
end

function strip_tone(word)
  return u.gsub(u.gsub(u.toNFD(word), "[́̈̂]", ""), "i", "ı")
end

local deranimap = {
  -- consonants
  ["m"]  = "󱚰", -- DERANI LETTER MAMEI
  ["m_"] = "󱚱", -- DERANI LETTER MAMEI CODA
  ["b"]  = "󱚲", -- DERANI LETTER BUBUE
  ["p"]  = "󱚳", -- DERANI LETTER PIPOQ
  ["f"]  = "󱚴", -- DERANI LETTER FOFUAQ
  ["n"]  = "󱚵", -- DERANI LETTER NANAQ
  ["d"]  = "󱚶", -- DERANI LETTER DUDEO
  ["t"]  = "󱚷", -- DERANI LETTER TITIEQ
  ["z"]  = "󱚸", -- DERANI LETTER ZOZEO
  ["c"]  = "󱚹", -- DERANI LETTER CECOA
  ["s"]  = "󱚺", -- DERANI LETTER SAQSEOQ
  ["r"]  = "󱚻", -- DERANI LETTER RAIRUA
  ["l"]  = "󱚼", -- DERANI LETTER LAOLIQ
  ["nh"] = "󱚽", -- DERANI LETTER NHANHOQ
  ["j"]  = "󱚾", -- DERANI LETTER JUJUO
  ["ch"] = "󱚿", -- DERANI LETTER CHICHAO
  ["sh"] = "󱛀", -- DERANI LETTER SHOSHIA
  ["ꝡ"]  = "󱛁", -- DERANI LETTER VEVA
  ["q_"] = "󱛂", -- DERANI LETTER AQ-AQ
  ["g"]  = "󱛃", -- DERANI LETTER GUGUI
  ["k"]  = "󱛄", -- DERANI LETTER KIKUE
  ["'"]  = "󱛅", -- DERANI LETTER O-AOMO
  ["h"]  = "󱛆", -- DERANI LETTER HEHAQ
  -- vowels
  ["a"]  = "󱚺", -- DERANI LETTER SAQSEOQ
  ["ı"]  = "󱚹", -- DERANI LETTER CECOA
  ["u"]  = "󱚲", -- DERANI LETTER BUBUE
  ["o"]  = "󱛃", -- DERANI LETTER GUGUI
  ["e"]  = "󱚴", -- DERANI LETTER FOFUAQ
  -- tone marks
  [1]    = "",
  [2]    = "󱛊", -- DERANI COMBINING RISING TONE
  [3]    = "󱛋", -- DERANI COMBINING LOW GLOTTAL TONE
  [4]    = "󱛌", -- DERANI COMBINING RISING-FALLING TONE
  -- prefix
  ["-"] = "󱛒",
  ["hiatus"] = "󱛍",
  ["diphthong"] = "󱛎",
  [""] = "",
  -- neirani
  ["aı"] = "󱚶", -- DERANI LETTER DUDEO
  ["ao"] = "󱚷", -- DERANI LETTER TITIEQ
  ["eı"] = "󱚸", -- DERANI LETTER ZOZEO
  ["oı"] = "󱚽", -- DERANI LETTER NHANHOQ
}

local derani_punctuation = {
  ["\""] = "󱛓",
  [":"] = "󱛓",
  [","] = " 󱛔",
  ["."] = " 󱛕",
  ["!"] = " 󱛖",
  ["?"] = " 󱛗",
  ["["] = "󱛘",
  ["]"] = "󱛙",
  ["*"] = " 󱛚",
  ["_"] = "󱛛", -- compatibility nbsp
}

local vowel_lookalikes = {
  ["s"] = true,
  ["b"] = true,
  ["c"] = true,
  ["g"] = true,
  ["f"] = true,
}

local diphthong_lookalikes = {
  ["d"] = true,
  ["p"] = true,
  ["z"] = true,
  ["nh"] = true,
}

local neirani = false

function deranize_word(word)
  local res = {}
  local ix = 1
  local tone = get_tone(word)
  local len = mw.ustring.len(word)
  -- NB. this is not PCRE regex, see https://www.mediawiki.org/wiki/Extension:Scribunto/Lua_reference_manual#Ustring_patterns
  local toaqre = "()([mbpfndtzcsrljꝡgk']?h?)([auıoe])([̣]?)([́̈̂̀]?)([auıoe]?[auıoe]?)()([qm]?)([-·]?)()([mbpfndtzcsrljꝡgk']?h?)([auıoe]?)"
  local is_first_syllable = true
  while ix <= len do
    local shouldbreak = false
    local pos_init, initial, medial, underdot, diacritic, final, pos_precoda, coda, hyphen, pos_postcoda, next_initial, next_medial = mw.ustring.match(word, toaqre, ix)
    if pos_init == nil then
      break
    end
    res[#res+1] = u.toNFC(u.sub(word, ix, pos_init - 1))
    if coda == "m" and next_initial == "" and next_medial ~= "" then
      coda = ""
      next_initial = "m"
      ix = pos_precoda
    else
      ix = pos_postcoda
    end

    local v = medial..final
    local is_diphthong = v == "aı" or v == "ao" or v == "eı" or v == "oı"
    if is_diphthong and neirani then medial, final = v, "" end
    local glyphs = {initial, tone, medial}
    if initial == ""
      and is_first_syllable
      and (final == "" or (neirani and is_diphthong))
      and coda == ""
      and (vowel_lookalikes[next_initial] or (neirani and diphthong_lookalikes[next_initial])) then
      glyphs[1] = "'"
    end
    if not is_first_syllable then glyphs[2] = "" end
    if glyphs[1] == "" or glyphs[1] == "'" or glyphs[1] == "ꝡ" then
      -- Move tone onto the first vowel
      glyphs[2], glyphs[3] = glyphs[3], glyphs[2]
    end
    if is_diphthong and not neirani then
      glyphs[#glyphs+1] = "diphthong"
    elseif final ~= "" then
      glyphs[#glyphs+1] = "hiatus"
    end
    if neirani then
      glyphs[#glyphs+1] = final
    else
      for j, fin in ipairs(mw.text.split(final, "")) do
        if j > 1 then glyphs[#glyphs+1] = "diphthong" end
        glyphs[#glyphs+1] = fin
      end
    end
    if coda ~= "" then
      glyphs[#glyphs+1] = coda .. "_"
    end
    if underdot ~= "" or hyphen ~= "" then
      glyphs[#glyphs+1] = "-"
    end
    for _, glyph in ipairs(glyphs) do
      res[#res+1] = deranimap[glyph] or "(" .. glyph .. "?)"
    end
    is_first_syllable = false
  end
  res[#res+1] = u.toNFC(u.sub(word, ix, len))
  return table.concat(res)
end

function deranize(frame)
  assert(frame.args[1] ~= nil, "This function requires at least one argument")
  neirani = frame.args[2] ~= nil
  local text = u.gsub(u.lower(u.toNFD(frame.args[1])), "i", "ı")
  local converted = u.gsub(text, "(%S+)", deranize_word)
  for k, v in pairs(derani_punctuation) do
    converted = u.gsub(converted, "%" .. k, v)
  end
  return converted
end

function clean(frame)
  assert(frame.args[1] ~= nil and frame.args[2] == nil, "This function requires exactly one argument")
  local cleaned = u.gsub(frame.args[1], "[:%[%]*]", "")
  local nbsp = string.char(0xC2, 0xA0)
  cleaned = u.gsub(cleaned, "_", nbsp)
  return cleaned
end

return {deranize = deranize, clean = clean}