Mòdul:kk-trans

De Viccionari
Salta a la navegació Salta a la cerca
Icona de documentació de mòdul Documentació del mòdul[mostra] [modifica] [refresca]

A continuació es mostra la documentació transclosa de la subpàgina /ús. [salta a la caixa de codi]


Mòdul per a la transcripció automàtica del kazakh segons Viccionari:Romanització del kazakh. És usat per altres mòduls i indirectament per les plantilles {{entrada}}, {{terme}} i {{trad}}.

local export = {}

local u = mw.ustring.char
local GR = u(0x0300) -- grave =  ̀
local AC = u(0x0301) -- acute = ˊ
local DI = u(0x0308) -- diaeresis = ¨

local tab_tlit = {
	["А"]="A",['а']='a',   ["Ә"]="Ä",['ә']='ä',   ["Б"]="B",['б']='b',   ["В"]="V",['в']='v',   ["Г"]="G",['г']='g', 
	["Ғ"]="Ğ",['ғ']='ğ',   ["Д"]="D",['д']='d',   ["Е"]="E",['е']='e',   ["Ё"]="Yo",['ё']='yo', ["Ж"]="J",['ж']='j', 
	["З"]="Z",['з']='z',   ["И"]="Ï",['и']='ï',   ["Й"]="Y",['й']='y',   ["К"]="K",['к']='k',   ["Қ"]="Q",['қ']='q',
	["Л"]="L",['л']='l',   ["М"]="M",['м']='m',   ["Н"]="N",['н']='n',   ["Ң"]="Ñ",['ң']='ñ',   ["О"]="O",['о']='o', 
	["Ө"]="Ö",['ө']='ö',   ["П"]="P",['п']='p',   ["Р"]="R",['р']='r',   ["С"]="S",['с']='s',   ["Т"]="T",['т']='t', 
	["У"]="W",['у']='w',   ["Ұ"]="U",['ұ']='u',   ["Ү"]="Ü",['ү']='ü',   ["Ф"]="F",['ф']='f',   ["Х"]="X",['х']='x',
	["Һ"]="h",['һ']='h',   ["Ц"]="C",['ц']='c',   ["Ч"]="Ç",['ч']='ç',   ["Ш"]="Ş",['ш']='ş',   ["Щ"]="Şş",['щ']='şş',
	["Ъ"]="ʺ",['ъ']='ʺ',   ["Ы"]="I",['ы']='ı',   ["І"]="I",['і']='i',   ["Ь"]="ʹ",['ь']='ʹ',   ["Э"]="É",['э']='é',
	["Ю"]="Yw",['ю']='yw', ["Я"]="Ya",['я']='ya'
}

local tab_tcrip = {
	["А"]="A",['а']='a',   ["Ә"]="A",['ә']='a',   ["Б"]="B",['б']='b',   ["В"]="V",['в']='v',   ["Г"]="G",['г']='g', 
	["Ғ"]="G",['ғ']='g',   ["Д"]="D",['д']='d',   ["Е"]="E",['е']='e',   ["Ё"]="Io",['ё']='io', ["Ж"]="J",['ж']='j', 
	["З"]="Z",['з']='z',   ["И"]="I",['и']='i',   ["Й"]="I",['й']='i',   ["К"]="K",['к']='k',   ["Қ"]="Kh",['қ']='kh',
	["Л"]="L",['л']='l',   ["М"]="M",['м']='m',   ["Н"]="N",['н']='n',   ["Ң"]="Ng",['ң']='ng', ["О"]="O",['о']='o', 
	["Ө"]="O",['ө']='o',   ["П"]="P",['п']='p',   ["Р"]="R",['р']='r',   ["С"]="S",['с']='s',   ["Т"]="T",['т']='t', 
	["У"]="U",['у']='u',   ["Ұ"]="U",['ұ']='u',   ["Ү"]="U",['ү']='u',   ["Ф"]="F",['ф']='f',   ["Х"]="Kh",['х']='kh',
	["Һ"]="h",['һ']='h',   ["Ц"]="Ts",['ц']='ts', ["Ч"]="Tx",['ч']='tx', ["Ш"]="X",['ш']='x',   ["Щ"]="Sx",['щ']='sx',
	["Ъ"]="",['ъ']='',     ["Ы"]="I",['ы']='i',   ["І"]="I",['і']='i',   ["Ь"]="",['ь']='',     ["Э"]="E",['э']='e',
	["Ю"]="Iu",['ю']='iu', ["Я"]="Ia",['я']='ia'
}
	
-- FIXME! Doesn't work with ɣ, which gets included in this character set
local non_consonants = "[АӘЕЁИОӨҰҮЫІЭЮЯаәеёиоөұүыіэюяʹʺ]"

local function map_to_je(pre, e)
	local map_to_je_map = {["Е"] = "Ie", ["е"] = "ie"}
	if e == nil then
		return map_to_je_map[pre]
	end
	return pre .. map_to_je_map[e]
end

-- Transliterates a single word. It should include stress marks, which are then preserved in the transliteration.
local function wtr(cyr)
	cyr = mw.ustring.gsub(cyr, GR, AC)
	
	-- reducció de consonants duplicades no usades en català
	local no_dobles = {"([Вв])в", "([Жж])ж", "([Кк])к", "([Ққ])қ", "([Ңң])ң", "([Хх])х", "([Һһ])һ", "([Цц])ц", "([Чч])ч", "([Шш])ш", "([Щщ])щ"}
	for i = 1, #no_dobles do
		cyr = mw.ustring.gsub(cyr, no_dobles[i], "%1")
	end
	
	-- е after a vowel or at the beginning of a word becomes ie
	cyr = mw.ustring.gsub(cyr, "^([Ее])", map_to_je)
	cyr = mw.ustring.gsub(cyr, "(" .. non_consonants .. ")([Ее])", map_to_je)
	-- need to do it twice in case of sequences of such vowels
	cyr = mw.ustring.gsub(cyr, "(" .. non_consonants .. ")([Ее])", map_to_je)
	
	local latin = mw.ustring.gsub(cyr, '.', tab_tcrip)
	
	-- simplificació de dues i
	latin = mw.ustring.gsub(latin, "(i" .. AC .. "?)i", "%1")
	
	-- simplificació ngg
	latin = mw.ustring.gsub(latin, "ngg", "ng")
	
	-- correcció gue/gui
	latin = mw.ustring.gsub(latin, "([Gg])([ei])", "%1u%2")

	-- regles d'accentuació en català
	local char_acc = {["A"..AC]="À", ["E"..AC]="É", ["I"..AC]="Í", ["O"..AC]="Ó", ["U"..AC]="Ú", 
		["a"..AC]="à", ["e"..AC]="é", ["i"..AC]="í", ["i"..DI]="ï", ["o"..AC]="ó", ["u"..AC]="ú", ["u"..DI]="ü"}
	local sil = mw.text.split(require("Mòdul:ca-general").sil(mw.ustring.gsub(latin, "." .. AC, char_acc)), '·')
	
	if #sil == 1 then -- monosíl·laba sense accent
		latin = mw.ustring.gsub(latin, AC, "")
	elseif mw.ustring.find(sil[#sil], "[ÀàÉéÍíÓóÚú]") then -- aguda
		if not (mw.ustring.find(latin, "[aeiou]" .. AC .. "s?$") or mw.ustring.find(latin, "[ei]" .. AC .. "n$")) then
			latin = mw.ustring.gsub(latin, "([aeoiu][iu])" .. AC, "%1" .. DI)
			latin = mw.ustring.gsub(latin, "gui" .. DI, "gui")
			latin = mw.ustring.gsub(latin, AC, "")
		end
	elseif mw.ustring.find(sil[#sil-1], "[ÀàÉéÍíÓóÚú]") then -- plana
		if string.find(latin, "[aeiou]s?$") or string.find(latin, "[ei]n$") then
			if not string.find(latin, "[aeiou][iu]$") then
				latin = mw.ustring.gsub(latin, "([aeoiu][iu])" .. AC, "%1" .. DI)
				latin = mw.ustring.gsub(latin, "gui" .. DI, "gui")
				latin = mw.ustring.gsub(latin, AC, "")
			end
		end
	end
	-- accent obert à
	latin = mw.ustring.gsub(latin, ".[" .. AC .. DI .. "]", char_acc)
	
	-- correcció intervocàlica ss, ix
	latin = mw.ustring.gsub(latin, "([AEIOUaeiouÀÉÍÓÚàéíóúü])s([aeiouàéíóú])", "%1ss%2")
	latin = mw.ustring.gsub(latin, "([AEOUaeouÀÉÓÚàéóúü])x", "%1ix")
	
	return latin
end

-- Transliterates text, a single word or phrase.
function export.translit(text)
	if type(text) == "table" then text = text.args[1] end
	return (mw.ustring.gsub(text, '.', tab_tlit))
end

function export.transcrip(text)
	if type(text) == "table" then text = text.args[1] end
	local trwords = {}
	for word in mw.text.gsplit(text, '%s') do
		table.insert(trwords, wtr(word))
	end
	
	return table.concat(trwords, ' ')
end

function export.tr(text)
	local tr_lit = export.translit(text)
	local tr_crip = export.transcrip(text)
	if tr_lit == tr_crip then
		return tr_crip
	end
	
	return tr_lit .. "/" .. tr_crip
end

return export