Module:Deranize: Difference between revisions

From The Toaq Wiki
(fix arity check at top of deranize())
(reformat lookup table)
Line 23: Line 23:
}
}


local deranitable = {
local deranimap = {
   -- consonants
   -- consonants
   m     = "", -- DERANI LETTER MAMEI
   ["m"]  = "", -- DERANI LETTER MAMEI
   m_   = "", -- DERANI LETTER MAMEI CODA
   ["m_"] = "", -- DERANI LETTER MAMEI CODA
   b     = "", -- DERANI LETTER BUBUE
   ["b"]  = "", -- DERANI LETTER BUBUE
   p     = "", -- DERANI LETTER PIPOQ
   ["p"]  = "", -- DERANI LETTER PIPOQ
   f     = "", -- DERANI LETTER FOFUAQ
   ["f"]  = "", -- DERANI LETTER FOFUAQ
   n     = "", -- DERANI LETTER NANAQ
   ["n"]  = "", -- DERANI LETTER NANAQ
   d     = "", -- DERANI LETTER DUDEO
   ["d"]  = "", -- DERANI LETTER DUDEO
   t     = "", -- DERANI LETTER TITIEQ
   ["t"]  = "", -- DERANI LETTER TITIEQ
   z     = "", -- DERANI LETTER ZOZEO
   ["z"]  = "", -- DERANI LETTER ZOZEO
   c     = "", -- DERANI LETTER CECOA
   ["c"]  = "", -- DERANI LETTER CECOA
   s     = "", -- DERANI LETTER SAQSEOQ
   ["s"]  = "", -- DERANI LETTER SAQSEOQ
   r     = "", -- DERANI LETTER RAIRUA
   ["r"]  = "", -- DERANI LETTER RAIRUA
   l     = "", -- DERANI LETTER LAOLIQ
   ["l"]  = "", -- DERANI LETTER LAOLIQ
   nh   = "", -- DERANI LETTER NHANHOQ
   ["nh"] = "", -- DERANI LETTER NHANHOQ
   j     = "", -- DERANI LETTER JUJUO
   ["j"]  = "", -- DERANI LETTER JUJUO
   ch   = "", -- DERANI LETTER CHICHAO
   ["ch"] = "", -- DERANI LETTER CHICHAO
   sh   = "", -- DERANI LETTER SHOSHIA
   ["sh"] = "", -- DERANI LETTER SHOSHIA
   ["ꝡ"] = "", -- DERANI LETTER VEVA
   ["ꝡ"] = "", -- DERANI LETTER VEVA
   q_   = "", -- DERANI LETTER AQ-AQ
   ["q_"] = "", -- DERANI LETTER AQ-AQ
   g     = "", -- DERANI LETTER GUGUI
   ["g"]  = "", -- DERANI LETTER GUGUI
   k     = "", -- DERANI LETTER KIKUE
   ["k"]  = "", -- DERANI LETTER KIKUE
   ["'"] = "", -- DERANI LETTER O-AOMO
   ["'"] = "", -- DERANI LETTER O-AOMO
   h     = "", -- DERANI LETTER HEHAQ
   ["h"]  = "", -- DERANI LETTER HEHAQ
   -- vowels
   -- vowels
   a     = "", -- DERANI LETTER SAQSEOQ
   ["a"]  = "", -- DERANI LETTER SAQSEOQ
   ["ı"] = "", -- DERANI LETTER CECOA
   ["ı"] = "", -- DERANI LETTER CECOA
   u     = "", -- DERANI LETTER BUBUE
   ["u"]  = "", -- DERANI LETTER BUBUE
   o     = "", -- DERANI LETTER GUGUI
   ["o"]  = "", -- DERANI LETTER GUGUI
   e     = "", -- DERANI LETTER FOFUAQ
   ["e"]  = "", -- DERANI LETTER FOFUAQ
   -- tone marks
   -- tone marks
   [2]   = "", -- DERANI COMBINING RISING TONE
   [2]   = "", -- DERANI COMBINING RISING TONE
   [3]   = "", -- DERANI COMBINING LOW GLOTTAL TONE
   [3]   = "", -- DERANI COMBINING LOW GLOTTAL TONE
   [4]   = "", -- DERANI COMBINING RISING-FALLING TONE
   [4]   = "", -- DERANI COMBINING RISING-FALLING TONE
   -- TODO: the rest of the owl
   -- TODO: the rest of the owl
}
}
Line 98: Line 98:
     end
     end
     for _, glyph in ipairs(glyphs) do
     for _, glyph in ipairs(glyphs) do
       local mapped = deranitable[glyph]
       local mapped = deranimap[glyph]
       if mapped then
       if mapped then
       res[#res+1] = mapped
       res[#res+1] = mapped
Line 108: Line 108:
end
end


return {deranitable = deranitable, deranize = deranize}
return {deranize = deranize}

Revision as of 19:26, 18 December 2022

Use with Template:Deranize. {{deranize|mamıeq doe}} renders like so:   (Scrıpt error: The functıon "clean" does not exıst.).


-- either mw.ustring.toNFD or mw.ustring.match doesn't work properly, so resorting to this for the time being (lol)
local untone = {
  ["a"] = {"a", 1},
  ["u"] = {"u", 1},
  ["ı"] = {"ı", 1},
  ["o"] = {"o", 1},
  ["e"] = {"e", 1},
  ["á"] = {"a", 2},
  ["ú"] = {"u", 2},
  ["í"] = {"ı", 2},
  ["ó"] = {"o", 2},
  ["é"] = {"e", 2},
  ["ä"] = {"a", 3},
  ["ü"] = {"u", 3},
  ["ï"] = {"ı", 3},
  ["ö"] = {"o", 3},
  ["ë"] = {"e", 3},
  ["â"] = {"a", 4},
  ["û"] = {"u", 4},
  ["î"] = {"ı", 4},
  ["ô"] = {"o", 4},
  ["ê"] = {"e", 4},
}

local deranimap = {
  -- consonants
  ["m"]  = "", -- DERANI LETTER MAMEI
  ["m_"] = "", -- DERANI LETTER MAMEI CODA
  ["b"]  = "", -- DERANI LETTER BUBUE
  ["p"]  = "", -- DERANI LETTER PIPOQ
  ["f"]  = "", -- DERANI LETTER FOFUAQ
  ["n"]  = "", -- DERANI LETTER NANAQ
  ["d"]  = "", -- DERANI LETTER DUDEO
  ["t"]  = "", -- DERANI LETTER TITIEQ
  ["z"]  = "", -- DERANI LETTER ZOZEO
  ["c"]  = "", -- DERANI LETTER CECOA
  ["s"]  = "", -- DERANI LETTER SAQSEOQ
  ["r"]  = "", -- DERANI LETTER RAIRUA
  ["l"]  = "", -- DERANI LETTER LAOLIQ
  ["nh"] = "", -- DERANI LETTER NHANHOQ
  ["j"]  = "", -- DERANI LETTER JUJUO
  ["ch"] = "", -- DERANI LETTER CHICHAO
  ["sh"] = "", -- DERANI LETTER SHOSHIA
  ["ꝡ"]  = "", -- DERANI LETTER VEVA
  ["q_"] = "", -- DERANI LETTER AQ-AQ
  ["g"]  = "", -- DERANI LETTER GUGUI
  ["k"]  = "", -- DERANI LETTER KIKUE
  ["'"]  = "", -- DERANI LETTER O-AOMO
  ["h"]  = "", -- DERANI LETTER HEHAQ
  -- vowels
  ["a"]  = "", -- DERANI LETTER SAQSEOQ
  ["ı"]  = "", -- DERANI LETTER CECOA
  ["u"]  = "", -- DERANI LETTER BUBUE
  ["o"]  = "", -- DERANI LETTER GUGUI
  ["e"]  = "", -- DERANI LETTER FOFUAQ
  -- tone marks
  [2]    = "", -- DERANI COMBINING RISING TONE
  [3]    = "", -- DERANI COMBINING LOW GLOTTAL TONE
  [4]    = "", -- DERANI COMBINING RISING-FALLING TONE
  -- TODO: the rest of the owl
}

-- {{#invoke:Deranize|deranize|arg}} (TODO: turn this into a family friendly template)
function deranize(frame)
  assert(frame.args[1] ~= nil and frame.args[2] == nil, "This function requires exactly one argument")
  local normalized = mw.ustring.gsub(mw.ustring.lower(frame.args[1]), "i", "ı")
  mw.log(normalized)
  local res = {}
  local ix = 1
  local len = mw.ustring.len(normalized)
  -- NB. this is not PCRE regex, see https://www.mediawiki.org/wiki/Extension:Scribunto/Lua_reference_manual#Ustring_patterns
  --              ↓ initpos (these ones eval to an index)                     precoda ↓        ↓ postcoda
  --                ↓ initial               ↓ medial                ↓ final             ↓ coda   ↓ lingeringvowel
  local toaqre = "()([mbpfndtzcsrljꝡgk']?h?)([auıoeáúíóéäüïöëâûîôê])([auıoe]?[auıoe]?)()([qm]?)()([auıoe]?)"
  while ix < len do
    local shouldbreak = false
    local initpos, initial, medial, final, precoda, coda, postcoda, lingeringvowel = mw.ustring.match(normalized, toaqre, ix)
    if initpos == nil then
      shouldbreak = true
      initpos = nil
    end
    mw.log(table.concat({ix, initpos, initial, medial, final, precoda, coda, postcoda, lingeringvowel}, ", "))
    res[#res+1] = mw.ustring.toNFC(mw.ustring.sub(normalized, ix, initpos - 1))
    if shouldbreak then break end
    if coda == "m" and lingervowel ~= "" then
      coda = ""
      ix = precoda
    else
      ix = postcoda
    end
    local nucleus, tone = unpack(untone[medial])
    local glyphs = {initial, nucleus, tone}
    if coda ~= "" then
      glyphs[#glyphs+1] = coda .. "_"
    end
    for _, fin in ipairs(mw.text.split(final, "")) do
      glyphs[#glyphs+1] = fin
    end
    for _, glyph in ipairs(glyphs) do
      local mapped = deranimap[glyph]
      if mapped then
      	res[#res+1] = mapped
      end
    end
    -- TODO: actually implement derani hiatus symbol, o'aomo, etc. insertion logic here 🤪
  end
  return table.concat(res)
end

return {deranize = deranize}