Module:Sandbox/Erutuon/X-SAMPA

local p = {}

local U = mw.ustring.char local gsub = mw.ustring.gsub local sub = mw.ustring.sub local find = mw.ustring.find local length = mw.ustring.len

-- Slashes \, apostrophes ', and double quotes " are escaped with \. -- \\ = \, \' = ', \" = "

local data = { ["a"] = { "a" }, ["b"] = { "b" }, -- not in official X-SAMPA; from http://www.kneequickie.com/kq/Z-SAMPA and used by Wiktionary ["b\\"] = { "ⱱ" }, ["b_<"] = { "ɓ" }, ["c"] = { "c" }, ["d"] = { "d" }, ["d`"] = { "ɖ", has_descender = true }, ["d_<"] = { "ɗ" }, -- not in official X-SAMPA; Wikipedia-specific ["d`_<"] = { "ᶑ", has_descender = true }, ["e"] = { "e" }, ["f"] = { "f" }, ["g"] = { "ɡ", has_descender = true }, ["g_<"] = { "ɠ", has_descender = true }, ["h"] = { "h" }, ["h\\"] = { "ɦ" }, ["i"] = { "i" }, ["j"] = { "j", has_descender = true }, ["j\\"] = { "ʝ", has_descender = true }, ["k"] = { "k" }, ["l"] = { "l" }, ["l`"] = { "ɭ", has_descender = true }, ["l\\"] = { "ɺ" }, ["m"] = { "m" }, ["n"] = { "n" }, ["n`"] = { "ɳ", has_descender = true }, ["o"] = { "o" }, ["p"] = { "p", has_descender = true }, ["p\\"] = { "ɸ", has_descender = true }, ["q"] = { "q", has_descender = true }, ["r"] = { "r" }, ["r`"] = { "ɽ", has_descender = true }, ["r\\"] = { "ɹ" }, ["r\\`"] = { "ɻ", has_descender = true }, ["s"] = { "s" }, ["s`"] = { "ʂ", has_descender = true }, ["s\\"] = { "ɕ" }, ["t"] = { "t" }, ["t`"] = { "ʈ" }, ["u"] = { "u" }, ["v"] = { "v" }, ["v\\"] = { "ʋ" }, ["w"] = { "w" }, ["x"] = { "x" }, ["x\\"] = { "ɧ", has_descender = true }, ["y"] = { "y", has_descender = true }, ["z"] = { "z" }, ["z`"] = { "ʐ", has_descender = true }, ["z\\"] = { "ʑ" }, ["A"] = { "ɑ" }, ["B"] = { "β", has_descender = true }, ["B\\"] = { "ʙ" }, ["C"] = { "ç", has_descender = true }, ["D"] = { "ð" }, ["E"] = { "ɛ" }, ["F"] = { "ɱ", has_descender = true }, ["G"] = { "ɣ", has_descender = true }, ["G\\"] = { "ɢ" }, ["G\\_<"] = { "ʛ" }, ["H"] = { "ɥ", has_descender = true }, ["H\\"] = { "ʜ" }, ["I"] = { "ɪ" }, ["I\\"] = { "ɪ̈" }, ["J"] = { "ɲ", has_descender = true }, ["J\\"] = { "ɟ" }, ["J\\_<"] = { "ʄ", has_descender = true }, ["K"] = { "ɬ" }, ["K\\"] = { "ɮ", has_descender = true }, ["L"] = { "ʎ" }, ["L\\"] = { "ʟ" }, ["M"] = { "ɯ" }, ["M\\"] = { "ɰ", has_descender = true }, ["N"] = { "ŋ", has_descender = true }, ["N\\"] = { "ɴ" }, ["O"] = { "ɔ" }, ["O\\"] = { "ʘ" }, ["P"] = { "ʋ" }, ["Q"] = { "ɒ" }, ["R"] = { "ʁ" }, ["R\\"] = { "ʀ" }, ["S"] = { "ʃ", has_descender = true }, ["T"] = { "θ" }, ["U"] = { "ʊ" }, ["U\\"] = { "ʊ̈" }, ["V"] = { "ʌ" }, ["W"] = { "ʍ" }, ["X"] = { "χ", has_descender = true }, ["X\\"] = { "ħ" }, ["Y"] = { "ʏ" }, ["Z"] = { "ʒ", has_descender = true }, ["."] = { "." },	["\""] = { "ˈ" },	["%"] = { "ˌ" },	-- not in official X-SAMPA; from http://www.kneequickie.com/kq/Z-SAMPA and used by Wiktionary	["%\\"] = { "ᴙ" }, 	["'"] = { "ʲ", is_diacritic = true },	[":"] = { "ː", is_diacritic = true },	[":\\"] = { "ˑ", is_diacritic = true },	["@"] = { "ə" },	["@`"] = { "ɚ" },	["@\\"] = { "ɘ" },	["{"] = { "æ" },	["}"] = { "ʉ" },	["1"] = { "ɨ" },	["2"] = { "ø" },	["3"] = { "ɜ" },	["3`"] = { "ɝ" },	["3\\"] = { "ɞ" },	["4"] = { "ɾ" },	["5"] = { "ɫ" },	["6"] = { "ɐ" },	["7"] = { "ɤ" },	["8"] = { "ɵ" },	["9"] = { "œ" },	["&"] = { "ɶ" },	["?"] = { "ʔ" },	["?\\"] = { "ʕ" },	["<\\"] = { "ʢ" },	[">\\"] = { "ʡ" },	["^"] = { "ꜛ" },	["!"] = { "ꜜ" },	-- not in official X-SAMPA	["!!"] = { "‼" }, 	["!\\"] = { "ǃ" },	["|"] = { "|", has_descender = true },	["|\\"] = { "ǀ", has_descender = true  },	["||"] = { "‖", has_descender = true  },	["|\\|\\"] = { "ǁ", has_descender = true  }, ["=\\"] = { "ǂ", has_descender = true }, -- linking mark, liaison ["-\\"] = { "‿", is_diacritic = true }, -- coarticulated; not in official X-SAMPA; used by Wiktionary ["__"] = { U(0x361) }, -- fortis, strong articulation; not in official X-SAMPA; used by Wiktionary ["_:"] = { U(0x348) }, ["_\""] = { U(0x308), is_diacritic = true },	-- advanced	["_+"] = { U(0x31F), with_descender = "˖", is_diacritic = true }, 	-- retracted	["_-"] = { U(0x320), with_descender = "˗", is_diacritic = true }, 	-- rising tone	["_/"] = { U(0x30C), is_diacritic = true }, 	-- voiceless	["_0"] = { U(0x325), with_descender = U(0x30A), is_diacritic = true }, 	-- syllabic	["="] = { U(0x329), with_descender = U(0x30D), is_diacritic = true }, 	-- syllabic	["_="] = { U(0x329), with_descender = U(0x30D), is_diacritic = true }, 	-- strident: not in official X-SAMPA; from http://www.kneequickie.com/kq/Z-SAMPA and used by Wiktionary	["_%\\"] = { U(0x1DFD) }, 	-- ejective	["_>"] = { "ʼ", is_diacritic = true }, 	-- pharyngealized	["_?\\"] = { "ˤ", is_diacritic = true }, 	-- falling tone	["_\\"] = { U(0x302), is_diacritic = true }, 	-- non-syllabic	["_^"] = { U(0x32F), with_descender = U(0x311), is_diacritic = true }, -- no audible release ["_}"] = { U(0x31A), is_diacritic = true }, -- r-coloring (colouring), rhotacization ["`"] = { U(0x2DE), is_diacritic = true }, -- nasalization ["~"] = { U(0x303), is_diacritic = true }, -- advanced tongue root ["_A"] = { U(0x318), is_diacritic = true }, -- apical ["_a"] = { U(0x33A), is_diacritic = true }, -- extra-low tone ["_B"] = { U(0x30F), is_diacritic = true }, -- low rising tone ["_B_L"] = { U(0x1DC5), is_diacritic = true }, -- less rounded ["_c"] = { U(0x31C), is_diacritic = true }, -- dental ["_d"] = { U(0x32A), is_diacritic = true }, -- velarized or pharyngealized (dark) ["_e"] = { U(0x334), is_diacritic = true }, -- downstep [""] = { "↘" }, -- falling tone ["_F"] = { U(0x302), is_diacritic = true }, -- velarized ["_G"] = { "ˠ", is_diacritic = true }, -- high tone ["_H"] = { U(0x301), is_diacritic = true }, -- high rising tone ["_H_T"] = { U(0x1DC4), is_diacritic = true }, -- aspiration ["_h"] = { "ʰ", is_diacritic = true }, -- palatalization ["_j"] = { "ʲ", is_diacritic = true }, -- creaky voice, laryngealization, vocal fry ["_k"] = { U(0x330), is_diacritic = true }, -- low tone ["_L"] = { U(0x300), is_diacritic = true }, -- lateral release ["_l"] = { "ˡ", is_diacritic = true }, -- mid tone ["_M"] = { U(0x304), is_diacritic = true }, -- laminal ["_m"] = { U(0x33B), is_diacritic = true }, -- linguolabial ["_N"] = { U(0x33C), is_diacritic = true }, -- nasal release ["_n"] = { "ⁿ", is_diacritic = true }, -- more rounded ["_O"] = { U(0x339), is_diacritic = true }, -- lowered ["_o"] = { U(0x31E), with_descender = "˕", is_diacritic = true }, -- retracted tongue root ["_q"] = { U(0x319), is_diacritic = true }, -- global rise [""] = { "↗" }, -- rising tone ["_R"] = { U(0x30C), is_diacritic = true }, -- rising falling tone ["_R_F"] = { U(0x1DC8), is_diacritic = true }, -- raised ["_r"] = { U(0x31D), is_diacritic = true }, -- extra-high tone ["_T"] = { U(0x30B), is_diacritic = true }, -- breathy voice, murmured voice, murmur, whispery voice ["_t"] = { U(0x324), is_diacritic = true }, -- voiced ["_v"] = { U(0x32C), is_diacritic = true }, -- labialized ["_w"] = { "ʷ", is_diacritic = true }, -- extra-short ["_X"] = { U(0x306), is_diacritic = true }, -- mid-centralized ["_x"] = { U(0x33D), is_diacritic = true }, ["__T"] = { "˥" }, ["__H"] = { "˦" }, ["__M"] = { "˧" }, ["__L"] = { "˨" }, ["__B"] = { "˩" }, }

local function _XSAMPAtoIPA(text) local output = {} local characteristics = {} while #text > 0 do		local substrings = { sub(text, 1, 4), sub(text, 1, 3), sub(text, 1, 2), sub(text, 1, 1) }		for i, substring in ipairs(substrings) do			local result, IPA, with_descender, has_descender, is_diacritic if data[substring] then result = data[substring] IPA = result[1] with_descender = result.with_descender has_descender = result.has_descender diacritic = result.is_diacritic if with_descender then -- Go backwords through the transcription, skipping any diacritics. local i = 0 while characteristics[#characteristics - i].is_diacritic do						i = i + 1 end --	Look at the first non-diacritic symbol before the current symbol.							If it has a descender, use the descender form of the current symbol. if characteristics[#characteristics - i].has_descender then IPA = with_descender end end elseif not substrings[i + 1] then IPA = substring end if IPA then text = sub(text, 6 - i)				table.insert(output, IPA) table.insert(characteristics, { has_descender = has_descender, is_diacritic = is_diacritic } ) break end end end return table.concat(output) end

function p.X2IPA(frame) local text = frame.getParent and frame:getParent.args[1] or frame.args and frame.args[1] or frame return _XSAMPAtoIPA(text) end

local function _IPAspan(text) return ""..text.." " end

function p.example(frame) local args = frame.args local parentargs = frame.getParent and frame:getParent.args local text = parentargs and parentargs[1] or args and args[1] or type(frame) == "string" and frame or error("No text provided") local output = { " ")	table.insert(output, "\n| ")	local IPA = _IPAspan(p.X2IPA(text))	table.insert(output, IPA)	return table.concat(output) end

return p