Mòdul:ca-general/proves

De Viccionari

La documentació d'ús d'aquest mòdul es pot crear a Mòdul:ca-general/proves/ús

-- Funcions generals per català.

local p = {}

-- Clau d'ordenació per a categories en català
local senseDiacritics = {
    ["À"] = "A", ["à"] = "a",
    ["È"] = "E", ["è"] = "e", ["É"] = "E", ["é"] = "e",
    ["Í"] = "I", ["í"] = "i", ["Ï"] = "I", ["ï"] = "i",
    ["Ò"] = "O", ["ò"] = "o", ["Ó"] = "O", ["ó"] = "o",
    ["Ú"] = "U", ["ú"] = "u", ["Ü"] = "U", ["ü"] = "u",
    ["Ç"] = "C", ["ç"] = "c",
    ["·"] = ""}
local senseSignes = {
    ["/"] = "",
    ["-"] = "",
    ["."] = "",
    -- l'espai en blanc és anterior a "!"
    -- el guió baix és posterior a A-Z i al final el Mediawiki ho converteix a majúscules
    [" "] = "&",
    ["'"] = "&"}
local ordreAccents = {  -- ordre eéè simulat amb ordre Unicode eèéê
    ["è"] = "ê",
    ["ò"] = "ô"}

function p.ordena(mot)
    if type(mot) == "table" then mot = mot.args[1] end -- des de plantilles via invoke o des de mòduls via require
    if mot == "" or mot == nil then
        mot = mw.title.getCurrentTitle().text
    end
    local clau           -- format complet: "clau1!clau2!clau3!mot"
    local clau1          -- clau primària: en minúscules i sense diacrítics ni signes
    local clau2          -- clau secundària: prioritat dels diacrítics eéè
    local clau3          -- clau terciària: prioritat minúscules > majúscules
                         -- clau quaternària: prioritat sense signes > amb signes
    local motSenseSignes = mw.ustring.gsub(mot, ".", senseSignes)
    local motLower = mw.ustring.lower(mw.text.trim(motSenseSignes))
    clau1 = mw.ustring.gsub(motLower, ".", senseDiacritics)
    if clau1 == mot then
        return clau1
    end
    clau2 = mw.ustring.gsub(motLower, ".", ordreAccents)
    clau3 = mw.ustring.gsub(motSenseSignes, ".", senseDiacritics)
    if clau1 == motLower then
        clau = clau1 .. "!"
    else
        clau = clau1 .. "!" .. clau2
    end
    if clau3 ~= clau1 then
        clau = clau .. "!" .. clau3
    elseif motSenseSignes ~= mot then
        clau = clau .. "!"
    end
    if motSenseSignes ~= mot then
        clau = clau .."!" .. mot
    end
    return clau
end

function p.cat_ortografia(frame)
	-- categorització per característiques ortogràfiques: invers, palíndroms, eles geminades
	local pagename = mw.title.getCurrentTitle().text
	local namespace = mw.title.getCurrentTitle().nsText
	local ret = ""
	if namespace == "" then
		if not (pagename:find("^-") or pagename:find("-$")) then -- sense afixos
			local motNet = mw.ustring.lower(mw.ustring.gsub(pagename, ".", senseSignes))
			motNet = string.gsub(motNet, " ", "")
			motNet = mw.ustring.gsub(motNet, ".", senseDiacritics)
			local motInvers = motNet:reverse()
			if not pagename:find("[ '.]") then -- sense locucions ni abreviatures
				ret = "[[Categoria:Diccionari invers en català|" .. motInvers .. "]]"
				if mw.ustring.find(pagename, "l·l") then
					ret = ret .. "[[Categoria:Mots en català amb eles geminades|" .. motNet .. "]]"
				end
			end
			-- palímdroms traslladat a [[Mòdul:palíndroms]
			--if motInvers == motNet and string.len(motNet) > 1 and not pagename:find("%.") then
			--	ret = ret .. "[[Categoria:Palíndroms en català" .. (motNet == pagename and "" or "|" .. motNet) .. "]]"
			--end
		end
	end
	return ret
end

--[[ 
Sil·labificació

    marcatge intern: vocals 0, obertures 1, codes 2
    síl·laba: ·(1*)0(2*)·
]]
function p.sil(mot)
	if type(mot) == "table" then mot = mot.args[1] end -- des de plantilles via invoke o des de mòduls via require
	if mot == "" or mot == nil then
		mot = mw.title.getCurrentTitle().text
	end
	local sil = mw.ustring.lower(mot)
	
	-- Prefixos que trenquen regles
	local initial = string.sub(mot, 1, 1)
	if initial == "a" then
		sil = string.gsub(sil, "^anae", "0200") -- an-
		sil = string.gsub(sil, "^anafro", "020110")
		sil = mw.ustring.gsub(sil, "^an[aà]l[fg]", "02021")
		sil = mw.ustring.gsub(sil, "^an[aà]r([cq])", "0202%1")
		sil = mw.ustring.gsub(sil, "^anè", "020")
		sil = string.gsub(sil, "^ane([nprs])", "020%1")
		sil = mw.ustring.gsub(sil, "^an[uú]r", "020r")
		sil = string.gsub(sil, "^autoi([mn])", "02100%1") -- auto-
	elseif initial == "b" then
		sil = mw.ustring.gsub(sil, "^bena(%l+è)", "1010%1")
		sil = string.gsub(sil, "^bena(%l)", "1020%1") -- ben-
		sil = string.gsub(sil, "^bene([ns]%l)", "1020%1")
		sil = mw.ustring.gsub(sil, "^bes[aà]v", "10201") -- bes-
		sil = string.gsub(sil, "^beson", "10202")
		sil = string.gsub(sil, "^bisan", "10202") -- bis-
	elseif initial == "c" then
		sil = string.gsub(sil, "^coin", "1002") -- co-
		sil = string.gsub(sil, "^con[ou][nr]", "10202") -- con-
		sil = string.gsub(sil, "^contrai", "1021100") -- contra-
	elseif initial == "d" then
		sil = mw.ustring.gsub(sil, "^des([aeiouú])", "102%1") -- des- amb excepcions pendents
	elseif initial == "e" then
		sil = string.gsub(sil, "^enanti", "010210") -- enantio-
		sil = mw.ustring.gsub(sil, "^en[oò]([flt])", "010%1") -- eno-
		sil = string.gsub(sil, "^enorm", "01021") -- 
		sil = mw.ustring.gsub(sil, "^en[aoò]", "020") -- en-, excepte anteriors
		sil = string.gsub(sil, "^exa([bclnrs])", "020%1") -- ex-
		sil = string.gsub(sil, "^exo([rs][^cdrpqt])", "020%1")
	elseif initial == "h" then
		sil = string.gsub(sil, "^hiper[ae]", "101020") -- hiper-
	elseif initial == "i" then
		sil = string.gsub(sil, "^inani[ct]", "010101")
		sil = string.gsub(sil, "^inefa", "01010")
		sil = mw.ustring.gsub(sil, "^in[eè]p", "0102")
		sil = mw.ustring.gsub(sil, "^in[eè]r([^ru])", "0102%1")
		sil = string.gsub(sil, "^ino[cs][ei]", "01010")
		sil = mw.ustring.gsub(sil, "^in[aeèoò]", "020") -- in-, excepte anteriors
		sil = string.gsub(sil, "^ini([gmn])", "020%1")
		sil = mw.ustring.gsub(sil, "^in[uú]([rst])", "020%1")
		sil = string.gsub(sil, "^infra[iu]", "021100") -- infra-
		sil = string.gsub(sil, "^inter[ao]([^p])", "021020%1") -- inter
		sil = string.gsub(sil, "^interest", "02102021")
		sil = string.gsub(sil, "^intra[iu]", "021100") -- intra-
	elseif initial == "m" then
		sil = string.gsub(sil, "^m[ai]cro[iu]", "101100") -- macro-, micro-
	elseif initial == "n" then
		sil = string.gsub(sil, "^nosal", "10202")
	elseif initial == "p" then
		sil = string.gsub(sil, "^pana([frt][rate][^a])", "1020%1") -- pan-
		sil = string.gsub(sil, "^panamer", "1020101")
		sil = string.gsub(sil, "^panisl", "102021")
		sil = mw.ustring.gsub(sil, "^panòpt", "102021")
		sil = string.gsub(sil, "^posta[bcl]([^$])", "102202%1") -- post-
		sil = string.gsub(sil, "^postes([^$])", "102202%1")
		sil = string.gsub(sil, "^post[io][mp]", "102202")
		sil = mw.ustring.gsub(sil, "^post[^aàeèéioòóu]", "10221")
		sil = string.gsub(sil, "^pr[eo]i([^x])", "1100%1") -- pre-, pro-
	elseif initial == "r" then
		sil = string.gsub(sil, "^rein[ae]", "10210")
		sil = string.gsub(sil, "^rei([^aegx])", "100%1") -- re-
		sil = string.gsub(sil, "^reun", "1001")
	elseif initial == "s" then
		sil = string.gsub(sil, "^sots[ai]", "10220") -- sots-
		sil = mw.ustring.gsub(sil, "^sub([aàíour])", "102%1") -- sub-
		sil = mw.ustring.gsub(sil, "^sub[eè]([^r])", "1020%1")
		sil = string.gsub(sil, "^subl[iu][nt]", "102101")
	elseif initial == "t" then
		sil = string.gsub(sil, "^trans[aeou]", "110220") -- trans-
	elseif initial == "u" then
		sil = string.gsub(sil, "^ultra[iu]", "021100") -- ultra-
	elseif initial == "v" then
		sil = string.gsub(sil, "^vosal", "10202")
	end
	-- Diftongs creixents
	sil = mw.ustring.gsub(sil, "[qg][uü][aàeèéiíïoòóuúü]", "110")
	sil = mw.ustring.gsub(sil, "[aàeèéiíïoòóuúü][iu][aàeèéiíïoòóuúü]", "010")
	sil = mw.ustring.gsub(sil, "^i[oò]ni(.)", "0010%1") -- excepció pels derivant de ió
	sil = mw.ustring.gsub(sil, "^(h?)[iu][aàeèéioòóu]", "%110")
	-- Sufixos i desinències amb estalvi de dièresi
	sil = string.gsub(sil, "[aeou]ir$", "002") -- infinitius -ir
	sil = string.gsub(sil, "[aeou]int$", "0022") -- gerundis
	sil = mw.ustring.gsub(sil, "[aeou]ir[éà]$", "0010") -- futur
	sil = mw.ustring.gsub(sil, "[aeou]iràs$", "00102") -- futur
	sil = string.gsub(sil, "[aeou]ire[mu]$", "00102") -- futur
	sil = string.gsub(sil, "[aeou]iran$", "00102") -- futur
	sil = string.gsub(sil, "[aeou]iria$", "00100") -- condicional
	sil = string.gsub(sil, "[aeou]irie[sn]$", "001002") -- condicional
	sil = string.gsub(sil, "[0iu]um(s?)$", "002%1") -- llatinismes
	sil = string.gsub(sil, "[0aeiou]isme(s?)$", "00210%1") -- -isme
	sil = string.gsub(sil, "[0aeiou]ist([ae]s?)$", "0021%1") -- -ista
	-- Diftongs decreixents
	sil = mw.ustring.gsub(sil, "[0aàeèéioòóu][u]", "02") -- inclou triftongs: creixent 10 + decreixent 2
	sil = mw.ustring.gsub(sil, "[0aàeèéoòóuúü][i]", "02")
	sil = string.gsub(sil, "ii$", "02") -- només final de mot, no amb prefix
	-- Nuclis vocàlics
	sil = mw.ustring.gsub(sil, "[aàeèéiíïoòóuúü]", "0")
	-- Codes finals
	sil = string.gsub(sil, "%l$", "2")
	sil = string.gsub(sil, "%l2$", "22")
	sil = string.gsub(sil, "%l22$", "222")
	-- Obertures
	sil = string.gsub(sil, "^%l", "1")
	sil = string.gsub(sil, "^1%l", "11")
	sil = string.gsub(sil, "^11%l", "111")
	sil = string.gsub(sil, "ll0", "110")
	sil = string.gsub(sil, "ny0", "110")
	sil = string.gsub(sil, "[ptcfbdg]r", "11")
	sil = string.gsub(sil, "[pcfbg]l", "11")
	sil = string.gsub(sil, "%l0", "10")
	sil = mw.ustring.gsub(sil, "[çñ]0", "10") -- %l (all ASCII lowercase letters) no inclou ç, ñ
	-- Codes interiors
	sil = string.gsub(sil, "[ps][%l1]", "21")
	sil = string.gsub(sil, "%l([12])", "2%1")
	
	-- Separació de síl·labes
	local anterior = ""
	local mot_sep = {}
	for i = 1, mw.ustring.len(mot) do
		actual = mw.ustring.sub(sil, i, i)
		if (actual == "0" or actual == "1") and (anterior == "0" or anterior == "2") then
			table.insert(mot_sep, "·")
		end
		table.insert(mot_sep, mw.ustring.sub(mot, i, i))
		anterior = actual
	end
	return table.concat(mot_sep)
end

function p.sil_template(frame)
	local word = frame:getParent().args[1] or mw.title.getCurrentTitle().text
	if mw.ustring.find(word, "[^ ]+ [^ ]+") then
		return
	end
	word = string.gsub(word, "-", "·")
	local sil = p.sil(word)
	local num_sil = mw.text.split(sil, '·')
	return sil .. ' (' .. #num_sil .. ')'
end

return p