Mòdul:es-pron/AFI

Documentació del mòdul
A continuació es mostra la documentació transclosa de la subpàgina /ús. [salta a la caixa de codi]
Mòdul que genera la pronúncia en castellà. La sortida és formatada pel Mòdul:es-pron.
local geolects = {
	"ps", -- peninsular septentrional, Northern peninsular (Madrid, Barcelona)
	"pm", -- peninsular meridional, Southern peninsular (Sevilla)
	"aa", -- americà alt, Highland Americas (México DF, Bogotá, Andes)
	"ab", -- americà baix, Lowland Americas (Caracas, Antillas)
	"rp", -- americà austral, rioplatense (variant "ab", Buenos Aires, Montevideo)
	}

local m_general = require("Module:es-general")

local features = {
	"seseo", -- θ > s
	"yeísmo", -- ʎ > j
	-- Oclusivas
	"espirantización restringida de oclusivas sonoras", -- V + b,d,g > β,ð,ɣ
	"ensordecimiento de oclusivas sonoras", -- b,d,g > p,t,k en coda
	"grupo tl", -- d·l > tl
	"grupo x más consonante", -- ks > s
	"debilitamiento de oclusiva dental sonora", -- d final, -ado
	"desafricazión de la africada alveopalatal", -- t͡ʃ > ʃ
	-- Nasales
	"asimilación de lugar de nasales", -- n > m,ɱ,ŋ
	"velarización nasal", -- n(s) coda > ŋ
	-- Fricativas
	"sonorización de fricativas", -- s,θ,f,x > z,z,v,ɣ coda davant sonora
	"desbucalización de fricativas en coda", -- s,θ,f,x > h
	"desbucalización de fricativas finales",
	"desbucalización x", -- x > h
	-- Vibrantes
	"asibilición de vibrante simple", -- ɾ coda > z
	"africación de oclusiva dental más vibrante", -- /tɾ/ > [t͡s], /dr/ > [d͡z]
	-- Grupos consonánticos
	"debilitamiento de grupos consonánticos",
	-- Semiconsonantes
	"fricativización de j", -- j > ʝ
	"asibilación de j", -- j > ʒ
	"consonante w", -- w > ɣw
	-- Vocales
	"vocales caedizas", -- (e/i/o/u/a) àtones sordes
	}

local function apply_rule(data, geo, rule)
	for _, v in ipairs(geo) do
		if type(rule) == "function" then
			data[v] = mw.ustring.gsub(data[v], ".+", rule)
		else
			data[v] = mw.ustring.gsub(data[v], rule.from, rule.to)
		end
	end
	return data
end

local feature_functions = {}

feature_functions["seseo"] = {func = function(data)
	local rule = {from = "θ", to = "s"}
	local geo = {"aa", "ab", "rp"}
	data = apply_rule(data, geo, rule)
	return data
end
}

feature_functions["yeísmo"] = {func = function(data)
	local rule = {from = "ʎ", to = "y"}
	local geo = {"pm", "aa", "ab", "rp"}
	data = apply_rule(data, geo, rule)
	return data
end
}

feature_functions["espirantización restringida de oclusivas sonoras"] = {func = function(data)
	local rule = function(word)
		word = mw.ustring.gsub(word, "([aáeéiíoóuú]·?)β", "%1b")
		word = mw.ustring.gsub(word, "([aáeéiíoóuú]·?)ð", "%1d")
		word = mw.ustring.gsub(word, "([aáeéiíoóuú]·?)ɣ", "%1ɡ")
		return word
	end
	local geo = {"aa"}
	data = apply_rule(data, geo, rule)
	-- final -d
	rule = {from = "ð$", to = "d"}
	geo = {"rp"}
	data = apply_rule(data, geo, rule)
	return data
end
}

-- feature_functions["ensordecimiento de oclusivas sonoras"]

feature_functions["grupo tl"] = {func = function(data)
	local rule = {from = "ð·l", to = "·tl"}
	local geo = {"aa", "ab", "rp"}
	data = apply_rule(data, geo, rule)
	return data
end
}

feature_functions["grupo x más consonante"] = {func = function(data)
	local rule = {from = "ks·", to = "s·"}
	local geo = {"pm"}
	data = apply_rule(data, geo, rule)
	return data
end
}

feature_functions["debilitamiento de oclusiva dental sonora"] = {func = function(data)
	-- strong weakness -d, -ado; elision colloquial; vulgar in other intervocalic
	local rule = function(word)
		word = mw.ustring.gsub(word, "ð$", "(ð)")
		word = mw.ustring.gsub(word, "([aá])·ð(os?)$", "%1·(ð)%2")
		return word
	end
	local geo = {"pm"} -- partially "ab" and "aa" (not Mexico)
	data = apply_rule(data, geo, rule)
	return data
end
}

-- feature_functions["desafricazión de la africada alveopalatal"]

feature_functions["asimilación de lugar de nasales"] = {func = function(data)
	local rule = function(word)
		word = mw.ustring.gsub(word, "n·f", "ɱ·f")
		word = mw.ustring.gsub(word, "n(·[bp])", "m%1")
		word = mw.ustring.gsub(word, "n(·?[kɡx])", "ŋ%1")
		word = mw.ustring.gsub(word, "n·y", "ɲ·y")
		return word
	end
	local geo = {"ps", "pm", "aa", "rp"}
	data = apply_rule(data, geo, rule)
	return data
end
}

feature_functions["velarización nasal"] = {func = function(data)
	local rule = function(word)
		word = mw.ustring.gsub(word, "n(s?)$", "ŋ%1")
		word = mw.ustring.gsub(word, "n(s?·)", "ŋ%1")
		return word
	end
	local geo = {"pm", "ab"}
	data = apply_rule(data, geo, rule)
	return data
end
}

feature_functions["sonorización de fricativas"] = {func = function(data)
	local rule = function(word)
		word = mw.ustring.gsub(word, "[sθ](·[βðɣmnɲlʎr])", "z%1")
		word = mw.ustring.gsub(word, "f(·[βðɣmnɲlʎr])", "v%1")
		word = mw.ustring.gsub(word, "x(·[βðɣmnɲlʎr])", "ɣ%1")
		return word
	end
	local geo = {"ps", "aa"}
	data = apply_rule(data, geo, rule)
	return data
end
}

feature_functions["desbucalización de fricativas en coda"] = {func = function(data)
	local rule = {from = "[sθfx]·", to = "h·"}
	local geo = {"pm", "ab", "rp"}
	data = apply_rule(data, geo, rule)
	return data
end
}

feature_functions["desbucalización de fricativas finales"] = {func = function(data)
	local rule = {from = "[sθfx]$", to = "h"}
	local geo = {"pm", "ab"}
	data = apply_rule(data, geo, rule)
	return data
end
}

feature_functions["desbucalización x"] = {func = function(data)
	local rule = {from = "x", to = "h"}
	local geo = {"pm", "ab"}
	data = apply_rule(data, geo, rule)
	return data
end
}

-- feature_functions["asibilición de vibrante simple"]

feature_functions["africación de oclusiva dental más vibrante"] = {func = function(data)
	local rule = {from = "([dðt]ɾ)", to = {["dɾ"] = "d͡z", ["ðɾ"] = "d͡z", ["tɾ"] = "t͡s"}}
	local geo = {"aa"}
	data = apply_rule(data, geo, rule)
	return data
end
}

feature_functions["debilitamiento de grupos consonánticos"] = {func = function(data)
	-- weakening groups bs/ds/ns + C
	local rule = {from = "([βðnŋ])([sh])·", to = "(%1)%2·"}
	local geo = {"ps", "pm", "ab", "rp"}
	data = apply_rule(data, geo, rule)
	-- weakening groups ls/rs + C
	rule = {from = "([lɾ])([sh])·", to = "%1(%2)·"}
	data = apply_rule(data, geo, rule)
	-- weakening groups s/n + C final
	rule = {from = "([snŋ])([^aáeéiíoóuú])$", to = "%1(%2)"}
	data = apply_rule(data, geo, rule)
	-- weakening groups C + s final
	rule = {from = "([^aáeéiíoóuú])([shzθ])$", to = "(%1)%2"}
	data = apply_rule(data, geo, rule)
	return data
end
}

feature_functions["fricativización de j"] = {func = function(data)
	local rule = {from = "y", to = "ʝ"}
	local geo = {"ps", "pm", "aa", "ab"}
	data = apply_rule(data, geo, rule)
	return data
end
}

feature_functions["asibilación de j"] = {func = function(data)
	local rule = {from = "y", to = "ʒ"}
	local geo = {"rp"}
	data = apply_rule(data, geo, rule)
	return data
end
}

feature_functions["consonante w"] = {func = function(data)
	local rule = function(word)
		word = mw.ustring.gsub(word, "^w", "(ɣ)w")
		word = mw.ustring.gsub(word, "([aáeéiíoóuú]·)w", "%1(ɣ)w")
		return word
	end
	local geo = {"pm", "ab", "rp"}
	data = apply_rule(data, geo, rule)
	return data
end
}

feature_functions["vocales caedizas"] = {func = function(data)
	-- Weakening, devoicing or elision of unstressed vowels in voiceless consonant context
	-- Includes diphthong and r in consonantic group
	-- Probabilities, in order: s+V+S, voiceless+V+voiceless, V+s, voiceless+V
	-- Only marked last match, most likely at the end of the word
	local rule = function(word)
		local reverse_word = ''
		for i = mw.ustring.len(word), 1, -1 do
			reverse_word = reverse_word .. mw.ustring.sub(word, i, i)
		end
		if mw.ustring.find(reverse_word, "s·?[iu]?[eioua][jw]?s") then
			reverse_word = mw.ustring.gsub(reverse_word, "(s·?)([iu]?[eioua][jw]?)s", "%1)%2(s", 1) -- sVs
		elseif mw.ustring.find(reverse_word, "[tkʃxhs]·?[eioua]ɾ?[tkʃxhs]") then
			reverse_word = mw.ustring.gsub(reverse_word, "([tkʃxhs]·?)([eioua]ɾ?)([tkʃxhs])", "%1)%2(%3", 1) -- [voiceless]V[voiceless]
		elseif mw.ustring.find(reverse_word, "s·?[iu]?[eioua][jwɾ]?.") then
			reverse_word = mw.ustring.gsub(reverse_word, "(s·?)([iu]?[eioua][jwɾ]?)(.)", "%1)%2(%3", 1) -- .Vs
		elseif mw.ustring.find(reverse_word, "[eioua]ɾ?[ptkfʃxhs]") then
			reverse_word = mw.ustring.gsub(reverse_word, "([iu]?[eioua]ɾ?)([ptkfʃxhs])", ")%1(%2", 1) -- [voiceless]V
		end
		local word = ''
		for i = mw.ustring.len(reverse_word), 1, -1 do
			word = word .. mw.ustring.sub(reverse_word, i, i)
		end
		return word
	end
	local geo = {"aa"}
	data = apply_rule(data, geo, rule)
	return data
end
}

local function to_IPA(word)
	 -- digraphs
	word = mw.ustring.gsub(word, "ch", "tʃ")
	word = mw.ustring.gsub(word, "ll", "ʎ")
	word = mw.ustring.gsub(word, "^mn", "n")
	word = mw.ustring.gsub(word, "^p([st])", "%1")
	word = mw.ustring.gsub(word, "^gn", "n")
	word = mw.ustring.gsub(word, "q[uú]?", "k")
	word = mw.ustring.gsub(word, "st·", "s·")
	-- letter y
	word = mw.ustring.gsub(word, "^hié", "yé")
	word = mw.ustring.gsub(word, "^i([aáoó])", "y%1")
	word = mw.ustring.gsub(word, "y$", "i")
	word = mw.ustring.gsub(word, "y([^aáeéiíoóuú])", "i%1")
	-- consonants
	word = mw.ustring.gsub(word, ".", {["b"] = "β", ["d"] = "ð", ["g"] = "ɣ", ["h"] = "",
		["ñ"] = "ɲ", ["r"] = "ɾ", ["v"] = "β", ["z"] = "θ"})
	word = mw.ustring.gsub(word, "^β", "b")
	word = mw.ustring.gsub(word, "([mn]·)β", "%1b")
	word = mw.ustring.gsub(word, "c([eéií])", "θ%1")
	word = mw.ustring.gsub(word, "c", "k")
	word = mw.ustring.gsub(word, "^ð", "d")
	word = mw.ustring.gsub(word, "([mnl]·)ð", "%1d")
	word = mw.ustring.gsub(word, "^x", "s")
	word = mw.ustring.gsub(word, "(·?)x", "k%1s")
	word = mw.ustring.gsub(word, "ɣ([eéií])", "x%1")
	word = mw.ustring.gsub(word, "ɣu([eéií])", "ɣ%1")
	word = mw.ustring.gsub(word, "^ɣ", "ɡ")
	word = mw.ustring.gsub(word, "([mn]·)ɣ", "%1ɡ")
	word = mw.ustring.gsub(word, "j", "x")
	word = mw.ustring.gsub(word, "^ɾ", "r")
	word = mw.ustring.gsub(word, "([βlnst]·)ɾ", "%1r")
	word = mw.ustring.gsub(word, "ɾɾ", "r")
	word = mw.ustring.gsub(word, "ɾ·ɾ", "ɾ·r")
	word = mw.ustring.gsub(word, "t·", "ð·")
	word = mw.ustring.gsub(word, "t$", "ð")
	word = mw.ustring.gsub(word, "tʃ", "t͡ʃ")
	-- vowels
	word = mw.ustring.gsub(word, "ü", "w")
	word = mw.ustring.gsub(word, "i([aáeéoóuú])", "j%1")
	word = mw.ustring.gsub(word, "u([aáeéoóií])", "w%1")
	
	return word
end

local function accent_mark(word)
	-- weird, needed to detect unstressed vowels
	local mente = mw.ustring.find(word, "·men·te$")
	if mente then
		word = mw.ustring.sub(word, 1, -8)
	end
	if not mw.ustring.find(word, "[áéíóú]") then
		local accent_mark = {["a"] = "á", ["e"] = "é", ["i"] = "í", ["o"] = "ó", ["u"] = "ú",
			["ia"] = "iá", ["ie"] = "ié", ["io"] = "ió", ["iu"] = "iú",
			["ua"] = "uá", ["ue"] = "ué", ["ui"] = "uí", ["uo"] = "uó"}
		if mw.ustring.find(word, "[nsaeiou]$") then
			local stressed_syl = mw.ustring.match(word, "(%l-)·%l-$") -- ustring for ñ
			if stressed_syl then
				local accented_syl = mw.ustring.gsub(stressed_syl, "[iu]?[aeiou]", accent_mark, 1)
				word = mw.ustring.gsub(word, stressed_syl .. "(·%l-)$", accented_syl .. "%1")
			else
				word = mw.ustring.gsub(word, "[iu]?[aeiou]", accent_mark, 1) -- monosyllabe
			end
		else
			local stressed_syl = string.match(word, "(%l-)$")
			if stressed_syl then
				local accented_syl = mw.ustring.gsub(stressed_syl, "[iu]?[aeiou]", accent_mark, 1)
				word = mw.ustring.gsub(word, stressed_syl .. "$", accented_syl)
			end
		end
	end
	if mente then
		word = word .. "·mén.te"
	end
	return word
end

local function IPA_separators(word)
	word = mw.ustring.gsub(word, "^([^·]*[áéíóú])", "ˈ%1")
	word = mw.ustring.gsub(word, "·([^·]*[áéíóú])", "ˈ%1")
	word = mw.ustring.gsub(word, ".", {["á"] = "a", ["é"] = "e", ["í"] = "i", ["ó"] = "o", ["ú"] = "u"})
	if mw.ustring.match(word, "ˈ.+ˈ") then
		word = mw.ustring.gsub(word, "ˈ", "ˌ", 1)
	end
	word = mw.ustring.gsub(word, "·", ".")
	local separator = "/" -- phonemic
	if mw.ustring.find(word, "[βðɣɱŋʒ]") then
		separator = "\\" -- phonemic extended with relevant phonologic allophones
	end
	return separator .. mw.ustring.gsub(word, "·", ".") .. separator
end

local function show(word)
	if word == "" then
		word = mw.title.getCurrentTitle().text
	end
	
	-- syllabification
	word = m_general.sil(mw.ustring.lower(word)) -- separated with mid point ·
	word = mw.ustring.gsub(word, "([nd])·h", "·%1") -- pronounced as onset not coda
	word = accent_mark(word) -- add accent mark in stressed syllabe
	
	word = to_IPA(word) -- general standard, no seseo/ceceo, no yeísmo
	
	local data = {}
	for _, geo in ipairs(geolects) do
		data[geo] = word
	end
	
	-- apply features to each geolect
	for _, feature in ipairs(features) do
		if feature_functions[feature] then
			data = feature_functions[feature].func(data)
		end
	end
	
	for geo, ipa in pairs(data) do
		data[geo] = IPA_separators(ipa)
	end
	
	return data
end

-- on debug console use: =p.debug("your_word")
local function _debug(word)
	local ret = show(word)
	
	local output = ''
	for k, v in pairs(ret) do
		output = output .. k .. ": " .. v .. "\n"
	end
	return output
end

-- Return exported functions
return {
	show = show,
	debug = _debug
	}