Mòdul:ky-trans

De Viccionari
Icona de documentació de mòdul Documentació del mòdul[mostra] [modifica] [refresca]

A continuació es mostra la documentació transclosa de la subpàgina /ús. [salta a la caixa de codi]


Mòdul per a la transcripció automàtica del kirguís segons w:Viquipèdia:Transcripció del kirguís. És usat per altres mòduls i indirectament per les plantilles {{entrada}}, {{m}} i {{trad}}.

local p = {}

local u = mw.ustring.char
local GR = u(0x0300) -- grave =  ̀
local AC = u(0x0301) -- acute = ˊ
local DI = u(0x0308) -- diaeresis = ¨

local tab_tcrip = {
	["А"]="A",['а']='a',   ["Б"]="B",['б']='b',   ["В"]="V",['в']='v',   ["Г"]="G",['г']='g', 
	["Д"]="D",['д']='d',   ["Е"]="E",['е']='e',   ["Ё"]="Io",['ё']='io', ["Ж"]="Dj",['ж']='dj', 
	["З"]="Z",['з']='z',   ["И"]="I",['и']='i',   ["Й"]="I",['й']='i',   ["К"]="K",['к']='k',
	["Л"]="L",['л']='l',   ["М"]="M",['м']='m',   ["Н"]="N",['н']='n',   ["Ң"]="Ng",['ң']='ng', ["О"]="O",['о']='o', 
	["Ө"]="O",['ө']='o',   ["П"]="P",['п']='p',   ["Р"]="R",['р']='r',   ["С"]="S",['с']='s',   ["Т"]="T",['т']='t', 
	["У"]="U",['у']='u',   ["Ү"]="U",['ү']='u',   ["Ф"]="F",['ф']='f',   ["Х"]="Kh",['х']='kh',
	["Ц"]="Ts",['ц']='ts', ["Ч"]="Tx",['ч']='tx', ["Ш"]="X",['ш']='x',   ["Щ"]="Sx",['щ']='sx',
	["Ъ"]="",['ъ']='',     ["Ы"]="U",['ы']='u',   ["Ь"]="",['ь']='',     ["Э"]="E",['э']='e',
	["Ю"]="Iu",['ю']='iu', ["Я"]="Ia",['я']='ia'
}

local non_consonants = "[АЕЁИОӨҮЫЭЮЯаеёиоөүыэюяʹʺ]"

local function map_to_je(pre, e)
	local map_to_je_map = {["Е"] = "Ie", ["е"] = "ie"}
	if e == nil then
		return map_to_je_map[pre]
	end
	return pre .. map_to_je_map[e]
end

-- Transliterates a single word. It should include stress marks, which are then preserved in the transliteration.
local function wtr(cyr)
	cyr = mw.ustring.gsub(cyr, GR, AC)
	
	-- reducció de consonants duplicades no usades en català
	local no_dobles = {"([Вв])в", "([Жж])ж", "([Кк])к", "([Ққ])қ", "([Ңң])ң", "([Хх])х", "([Һһ])һ", "([Цц])ц", "([Чч])ч", "([Шш])ш", "([Щщ])щ"}
	for i = 1, #no_dobles do
		cyr = mw.ustring.gsub(cyr, no_dobles[i], "%1")
	end
	
	-- е after a vowel or at the beginning of a word becomes ie
	cyr = mw.ustring.gsub(cyr, "^([Ее])", map_to_je)
	cyr = mw.ustring.gsub(cyr, "(" .. non_consonants .. ")([Ее])", map_to_je)
	-- need to do it twice in case of sequences of such vowels
	cyr = mw.ustring.gsub(cyr, "(" .. non_consonants .. ")([Ее])", map_to_je)
	
	local latin = mw.ustring.gsub(cyr, '.', tab_tcrip)
	
	-- simplificació de dues i, excepte hiat accentuat
	latin = mw.ustring.gsub(latin, "(i" .. AC .. "?)i$", "%1")
	latin = mw.ustring.gsub(latin, "(i" .. AC .. "?)i([^" .. AC .. "])", "%1%2")
	
	-- geminació ll
	latin = mw.ustring.gsub(latin, "ll", "l·l")
	
	-- simplificació ngg
	latin = mw.ustring.gsub(latin, "ngg", "ng")
	
	-- correcció gue/gui
	latin = mw.ustring.gsub(latin, "([Gg])([ei])", "%1u%2")
	
	-- regles d'accentuació en català
	local char_acc = {["A"..AC]="À", ["E"..AC]="É", ["I"..AC]="Í", ["O"..AC]="Ó", ["U"..AC]="Ú", 
		["a"..AC]="à", ["e"..AC]="é", ["i"..AC]="í", ["i"..DI]="ï", ["o"..AC]="ó", ["u"..AC]="ú", ["u"..DI]="ü"}
	local sil = mw.text.split(require("Mòdul:ca-general").sil(mw.ustring.gsub(latin, "." .. AC, char_acc)), '·')
	if #sil == 1 then -- monosíl·laba sense accent
		latin = mw.ustring.gsub(latin, AC, "")
	elseif mw.ustring.find(sil[#sil], "[ÀàÉéÍíÓóÚú]") then -- aguda
		if not (mw.ustring.find(latin, "[aeiou]" .. AC .. "s?$") or mw.ustring.find(latin, "[ei]" .. AC .. "n$")) then
			if not mw.ustring.find(cyr, "ю" .. AC) then -- hiatus except diphthong iu
				latin = mw.ustring.gsub(latin, "([aeoiu][iu])" .. AC, "%1" .. DI)
			end
			latin = mw.ustring.gsub(latin, "gui" .. DI, "gui")
			latin = mw.ustring.gsub(latin, AC, "")
		end
	elseif mw.ustring.find(sil[#sil-1], "[ÀàÉéÍíÓóÚú]") then -- plana
		if string.find(latin, "[aeiou]s?$") or string.find(latin, "[ei]n$") then
			if not string.find(latin, "[aeiou][iu]$") then
				latin = mw.ustring.gsub(latin, "([aeoiu][iu])" .. AC, "%1" .. DI)
				latin = mw.ustring.gsub(latin, "gui" .. DI, "gui")
				latin = mw.ustring.gsub(latin, AC, "")
			end
		end
	end
	-- accent obert à
	latin = mw.ustring.gsub(latin, ".[" .. AC .. DI .. "]", char_acc)
	
	-- correcció intervocàlica ss, ix
	latin = mw.ustring.gsub(latin, "([AEIOUaeiouÀÉÍÓÚàéíóúü])s([aeiouàéíóú])", "%1ss%2")
	latin = mw.ustring.gsub(latin, "([AEOUaeouÀÉÓÚàéóúü])x", "%1ix")
	
	return latin
end

-- Transliterates text, a single word or phrase.
function p.tr(text)
	if type(text) == "table" then text = text.args[1] end
	local trwords = {}
	for word in mw.text.gsplit(text, '%s') do
		table.insert(trwords, wtr(word))
	end
	
	return table.concat(trwords, ' ')
end

return p