Module:hy-pronunciation

Avy amin'i Wikibolana — Rakibolana malagasy malalaka

La documentation pour ce module peut être créée à Module:hy-pronunciation/doc

local export = {}

-- single characters that map to IPA sounds   
local phonetic_chars_map = {
	-- Eastern Armenian
	east = {
		["ա"]="ɑ", ["բ"]="b", ["գ"]="ɡ", ["դ"]="d", ["ե"]="ɛ", ["զ"]="z",
		["է"]="ɛ", ["ը"]="ə", ["թ"]="tʰ", ["ժ"]="ʒ", ["ի"]="i", ["լ"]="l",
		["խ"]="χ", ["ծ"]="t͡s", ["կ"]="k", ["հ"]="h", ["ձ"]="d͡z", ["ղ"]="ʁ", 
		["ճ"]="t͡ʃ", ["մ"]="m", ["յ"]="j", ["ն"]="n", ["շ"]="ʃ", ["ո"]="ɔ",
		["չ"]="t͡ʃʰ", ["պ"]="p", ["ջ"]="d͡ʒ", ["ռ"]="r", ["ս"]="s", ["վ"]="v", 
		["տ"]="t", ["ր"]="ɾ", ["ց"]="t͡sʰ", ["ւ"]="v", ["փ"]="pʰ", ["ք"]="kʰ",
		["օ"]="ɔ", ["ֆ"]="f", ["-"]=" ", ["՚"]="", ["-"]=""
	},
	-- Western Armenian
	west = {
		["ա"]="ɑ", ["բ"]="pʰ", ["գ"]="kʰ", ["դ"]="tʰ", ["ե"]="ɛ", ["զ"]="z",
		["է"]="ɛ", ["ը"]="ə", ["թ"]="tʰ", ["ժ"]="ʒ", ["ի"]="i", ["լ"]="l",
		["խ"]="χ", ["ծ"]="d͡z", ["կ"]="ɡ", ["հ"]="h", ["ձ"]="t͡sʰ", ["ղ"]="ʁ", 
		["ճ"]="d͡ʒ", ["մ"]="m", ["յ"]="j", ["ն"]="n", ["շ"]="ʃ", ["ո"]="ɔ",
		["չ"]="t͡ʃʰ", ["պ"]="b", ["ջ"]="t͡ʃʰ", ["ռ"]="r", ["ս"]="s", ["վ"]="v", 
		["տ"]="d", ["ր"]="ɾ", ["ց"]="t͡sʰ", ["ւ"]="v", ["փ"]="pʰ", ["ք"]="kʰ",
		["օ"]="ɔ", ["ֆ"]="f", ["-"]=" ", ["՚"]="", ["-"]=""
	},
}

-- character sequences of two that map to IPA sounds
local phonetic_2chars_map = {
	east = {
		{ 'ու', 'u' },
		-- diphthongization in the following combinations: [իե] = [jɛ], [իա] = [jɑ]
		{ 'իե', 'jɛ' },
		{ 'իա', 'jɑ' },
	},
	west = {
		-- if not in the initial position and if not preceded by [ɑɛəɔiu]
		{ '(.?.?)յու', function(before)
			if not (before == '' or mw.ustring.find(before, '[%sաեէիոօ]$')
			or before == "ու") then
				return before .. 'ʏ'
			end
		end },
		{ 'ու', 'u' },
		{ 'էօ', 'œ' },
		-- պ, տ, կ are not voiced after ս and շ
		{ 'սպ', 'sp' },
		{ 'ստ', 'st' },
		{ 'սկ', 'sk' },
		{ 'շպ', 'ʃp' },
		{ 'շտ', 'ʃt' },
		{ 'շկ', 'ʃk' },
		-- Western Armenian inserts ə in the causative
		{ 'ցնել', 't͡sʰənɛl' },

		-- diphthongization in the following combinations: [իե] = [jɛ], [իա] = [jɑ]
		{ 'իե', 'jɛ' },
		{ 'իա', 'jɑ' },
	},
}

function export._pronunciation(word, system)
	if not (phonetic_chars_map[system] and phonetic_2chars_map[system]) then
		error("Invalid system " .. tostring(system))
	end
	
	word = mw.ustring.lower(word)

	local phonetic = word

	-- then long consonants that are orthographically geminated.
	phonetic = mw.ustring.gsub(phonetic, "(.)%1", "%1ː")

	for _, replacement in ipairs(phonetic_2chars_map[system]) do
		phonetic = mw.ustring.gsub(phonetic, unpack(replacement))
	end

	-- ե and ո are pronounced as jɛ and vɔ word-initially.
	phonetic = mw.ustring.gsub(phonetic, "^ե", "յէ")
	phonetic = mw.ustring.gsub(phonetic, "^ո", "վօ")
	-- except when followed by another վ.
	phonetic = mw.ustring.gsub(phonetic, "^վօվ", "օվ")

	phonetic = mw.ustring.gsub(phonetic, '.', phonetic_chars_map[system])

	-- assimilation: nasal + velar plosives = velar nasal + velar plosives
	phonetic = mw.ustring.gsub(phonetic, "n([ɡk]+)", "ŋ%1")

	-- assimilation: ppʰ = pʰː; ttʰ = tʰː; ; kkʰ = kʰː
	phonetic = mw.ustring.gsub(phonetic, "ppʰ", "pʰː")
	phonetic = mw.ustring.gsub(phonetic, "ttʰ", "tʰː")
	phonetic = mw.ustring.gsub(phonetic, "kkʰ ", "kʰː")

	-- assimilation of n to m
	-- phonetic = mw.ustring.gsub(phonetic, "n([bp]+)", "m%1") [uncertain if this is regular]

	-- pseudo-palatalization under the influence of Russian [COLLOQUIAL, NOT STANDARD]
	--phonetic = mw.ustring.gsub(phonetic, "tj", "t͡sj")
	--phonetic = mw.ustring.gsub(phonetic, "tʰj", "t͡sʰj")
	--phonetic = mw.ustring.gsub(phonetic, "dj", "d͡zj")

	-- palatalization in the Eastern Armenian sequence -ությ-, especially in the suffix -ություն [considered non-standard by strict prescriptivists]
	if system == "east" then
		phonetic = mw.ustring.gsub(phonetic, "utʰj", "ut͡sʰj")
	end

	-- trilling of ɾ in some positions [COLLOQUIAL, NOT STANDARD]
	--phonetic = mw.ustring.gsub(phonetic, "ɾt", "rt")

	-- devoicing of consonants in some positions

	phonetic = mw.ustring.gsub(phonetic, "bpʰ", "pʰː")
	phonetic = mw.ustring.gsub(phonetic, "dpʰ", "tʰpʰ")
	phonetic = mw.ustring.gsub(phonetic, "ɡpʰ", "kʰpʰ")
	phonetic = mw.ustring.gsub(phonetic, "d͡zpʰ", "t͡sʰpʰ")
	phonetic = mw.ustring.gsub(phonetic, "d͡ʒpʰ", "t͡ʃʰpʰ")
	phonetic = mw.ustring.gsub(phonetic, "vpʰ", "fpʰ")

	phonetic = mw.ustring.gsub(phonetic, "btʰ", "pʰtʰ")
	phonetic = mw.ustring.gsub(phonetic, "dtʰ", "tʰː")
	phonetic = mw.ustring.gsub(phonetic, "ɡtʰ", "kʰtʰ")
	phonetic = mw.ustring.gsub(phonetic, "d͡ztʰ", "t͡sʰtʰ")
	phonetic = mw.ustring.gsub(phonetic, "d͡ʒtʰ", "t͡ʃʰtʰ")
	phonetic = mw.ustring.gsub(phonetic, "vtʰ", "ftʰ")

	phonetic = mw.ustring.gsub(phonetic, "bkʰ", "pʰkʰ")
	phonetic = mw.ustring.gsub(phonetic, "dkʰ", "tkʰ")
	phonetic = mw.ustring.gsub(phonetic, "ɡkʰ", "kʰː")
	phonetic = mw.ustring.gsub(phonetic, "d͡zkʰ", "t͡sʰkʰ")
	phonetic = mw.ustring.gsub(phonetic, "d͡ʒkʰ", "t͡ʃʰkʰ")
	phonetic = mw.ustring.gsub(phonetic, "vkʰ", "fkʰ")

	phonetic = mw.ustring.gsub(phonetic, "bt͡ʃʰ", "pʰt͡ʃʰ")
	phonetic = mw.ustring.gsub(phonetic, "dt͡ʃʰ", "tʰt͡ʃʰ")
	phonetic = mw.ustring.gsub(phonetic, "ɡt͡ʃʰ", "kʰt͡ʃʰ")
	phonetic = mw.ustring.gsub(phonetic, "d͡zt͡ʃʰ", "t͡sʰt͡ʃʰ")
	phonetic = mw.ustring.gsub(phonetic, "d͡ʒt͡ʃʰ", "t͡ʃʰː")
	phonetic = mw.ustring.gsub(phonetic, "vt͡ʃʰ", "ft͡ʃʰ")

	phonetic = mw.ustring.gsub(phonetic, "bt͡sʰ", "pʰt͡sʰ")
	phonetic = mw.ustring.gsub(phonetic, "dt͡sʰ", "tʰt͡sʰ")
	phonetic = mw.ustring.gsub(phonetic, "ɡt͡sʰ", "kʰt͡sʰ")
	phonetic = mw.ustring.gsub(phonetic, "d͡zt͡sʰ", "t͡sʰː")
	phonetic = mw.ustring.gsub(phonetic, "d͡ʒt͡sʰ", "t͡ʃʰt͡sʰ")
	phonetic = mw.ustring.gsub(phonetic, "vt͡sʰ", "ft͡sʰ")

	phonetic = mw.ustring.gsub(phonetic, "zpʰ", "spʰ")
	phonetic = mw.ustring.gsub(phonetic, "ztʰ", "stʰ")
	phonetic = mw.ustring.gsub(phonetic, "zkʰ", "skʰ")

	phonetic = mw.ustring.gsub(phonetic, "ʁt͡s", "χt͡s")
	phonetic = mw.ustring.gsub(phonetic, "ʁt͡ʃ", "χt͡ʃ")
	phonetic = mw.ustring.gsub(phonetic, "ʁp", "χp")
	phonetic = mw.ustring.gsub(phonetic, "ʁt", "χt")
	phonetic = mw.ustring.gsub(phonetic, "ʁk", "χk")

	phonetic = mw.ustring.gsub(phonetic, "vt͡s", "ft͡s")
	phonetic = mw.ustring.gsub(phonetic, "vt͡ʃ", "ft͡ʃ")
	phonetic = mw.ustring.gsub(phonetic, "vp", "fp")
	phonetic = mw.ustring.gsub(phonetic, "vt", "ft")
	phonetic = mw.ustring.gsub(phonetic, "vk", "fk")
	phonetic = mw.ustring.gsub(phonetic, "vs", "fs")
	phonetic = mw.ustring.gsub(phonetic, "vʃ", "fʃ")

	if system == "west" then
		phonetic = mw.ustring.gsub(phonetic, "χd͡z", "χt͡s")
		phonetic = mw.ustring.gsub(phonetic, "χd͡ʒ", "χt͡ʃ")
		phonetic = mw.ustring.gsub(phonetic, "χb", "χp")
		phonetic = mw.ustring.gsub(phonetic, "χd", "χt")
		phonetic = mw.ustring.gsub(phonetic, "χɡ", "χk")
	end

	if system == "west" then
		phonetic = mw.ustring.gsub(phonetic, "t͡ʃʰd͡z", "t͡ʃʰt͡s")
		phonetic = mw.ustring.gsub(phonetic, "t͡sʰd͡z", "t͡sʰt͡s")
		phonetic = mw.ustring.gsub(phonetic, "pʰd͡z", "pʰt͡s")
		phonetic = mw.ustring.gsub(phonetic, "tʰd͡z", "tʰt͡s")
		phonetic = mw.ustring.gsub(phonetic, "kʰd͡z", "kʰt͡s")

		phonetic = mw.ustring.gsub(phonetic, "t͡ʃʰd͡ʒ", "t͡ʃʰt͡ʃ")
		phonetic = mw.ustring.gsub(phonetic, "t͡sʰd͡ʒ", "t͡sʰt͡ʃ")
		phonetic = mw.ustring.gsub(phonetic, "pʰd͡ʒ", "pʰt͡ʃ")
		phonetic = mw.ustring.gsub(phonetic, "tʰd͡ʒ", "tʰt͡ʃ")
		phonetic = mw.ustring.gsub(phonetic, "kʰd͡ʒ", "kʰt͡ʃ")

		phonetic = mw.ustring.gsub(phonetic, "t͡ʃʰb", "t͡ʃʰp")
		phonetic = mw.ustring.gsub(phonetic, "t͡sʰb", "t͡sʰp")
		phonetic = mw.ustring.gsub(phonetic, "pʰb", "pʰp")
		phonetic = mw.ustring.gsub(phonetic, "tʰb", "tʰp")
		phonetic = mw.ustring.gsub(phonetic, "kʰb", "kʰp")

		phonetic = mw.ustring.gsub(phonetic, "t͡ʃʰd", "t͡ʃʰt")
		phonetic = mw.ustring.gsub(phonetic, "t͡sʰd", "t͡sʰt")
		phonetic = mw.ustring.gsub(phonetic, "pʰd", "pʰt")
		phonetic = mw.ustring.gsub(phonetic, "tʰd", "tʰt")
		phonetic = mw.ustring.gsub(phonetic, "kʰd", "kʰt")

		phonetic = mw.ustring.gsub(phonetic, "t͡ʃʰɡ", "t͡ʃʰk")
		phonetic = mw.ustring.gsub(phonetic, "t͡sʰɡ", "t͡sʰk")
		phonetic = mw.ustring.gsub(phonetic, "pʰɡ", "pʰk")
		phonetic = mw.ustring.gsub(phonetic, "tʰɡ", "tʰk")
		phonetic = mw.ustring.gsub(phonetic, "kʰɡ", "kʰk")

	end


	-- prothetic ə before {s/ʃ/z}{p/t/k/b/d/g}
	if system == "east" then
		phonetic = mw.ustring.gsub(phonetic, "^([sʃz][ptkbdɡ]+)", "(ə)%1")
	elseif system == "west" then
		phonetic = mw.ustring.gsub(phonetic, "^([sʃz][ptkbdɡ]+)", "ə%1")
	end

	-- generating the stress
	phonetic = mw.ustring.gsub(phonetic, "%S+", function(word)
		-- Do not add a stress mark for monosyllabic words. Check to see if the word contains only a single instance of [ɑɛəɔiuœʏ]+.
		local numberOfVowels = select(2, mw.ustring.gsub(word, "[ɑɛəɔiuœʏ]", "%0"))
	
		-- If polysyllabic, add IPA stress mark using the following rules. The stress is always on the last syllable not 
		-- formed by schwa [ə]. In some rare cases the stress is not on the last syllable. In such cases the stressed vowel
		-- is marked by the Armenian stress character <՛>, e.g. մի՛թե. So:
		--      1) Find the vowel followed by <՛>․ If none, jump to step 2. Else check if it is the first vowel of the word.
		--         If true, put the IPA stress at the beginning, else do step 3.
		--      2) Find the last non-schwa vowel, i.e. [ɑɛɔiuœʏ],
		--      3) If the IPA symbol preceding it is [ɑɛəɔiuœʏ], i.e. a vowel, put the stress symbol between them, 
		--         if it is NOT [ɑɛɔiuəœʏ], i.e. it is a consonant, 
		--         put the stress before that consonant.
		if numberOfVowels > 1 then
			local rcount
			word, rcount = mw.ustring.gsub(word, "([^ɑɛɔiuœʏə]*[ɑɛɔiuœʏə])՛", "ˈ%1")
			if rcount == 0 then
				word = mw.ustring.gsub(word, "([^ɑɛɔiuœʏə]*[ɑɛɔiuœʏ][^ɑɛɔiuœʏə]*)$", "ˈ%1")
				word = mw.ustring.gsub(word, "([^ɑɛɔiuœʏə]*[ɑɛəɔiuœʏ]?[ɑɛɔiuœʏ][^ɑɛɔiuœʏə]*ə[^ɑɛɔiuœʏə]*)$", "ˈ%1")
			end
			-- Including () in the second and third sets will only work
			-- if () never encloses a vowel.
			word = mw.ustring.gsub(word, "([ɑɛəɔiuœʏ])ˈ([^ɑɛɔiuœʏə()]+)([^ɑɛɔiuœʏəːˈʰ()])", "%1%2ˈ%3")
			word = mw.ustring.gsub(word, "(.)͡ˈ", "ˈ%1͡")
			return word
		end
	end)

	-- correcting the stress position in some cases
	if system == "east" then
		phonetic = mw.ustring.gsub(phonetic, "ut͡sʰˈj", "uˈt͡sʰj")
	end

	return phonetic
end

function export.pronunciation(word, system)
	if type(word) == "table" then
		local frame = word
		local invoke_args, parent_args = frame.args, frame:getParent().args
		word = invoke_args[1] or parent_args[1]
		system = invoke_args.system or parent_args.system or "east"
	end
	if not word or (word == "") then
		error("Please put the word as the first positional parameter!")
	end
	
	return export._pronunciation(word, system)
end
 
return export