Module:it-pronunciation

La documentation pour ce module peut être créée à Module:it-pronunciation/doc
local export = {}

local m_table = require("Module:table")

local u = mw.ustring.char
local rfind = mw.ustring.find
local rsubn = mw.ustring.gsub
local rmatch = mw.ustring.match
local rsplit = mw.text.split
local ulower = mw.ustring.lower
local uupper = mw.ustring.upper
local usub = mw.ustring.sub
local ulen = mw.ustring.len

local lang = require("Module:languages").getByCode("it")

local AC = u(0x301)
local GR = u(0x300)
local CFLEX = u(0x302)
local DOTOVER = u(0x0307) -- dot over =  ̇ = signal unstressed word
local DOTUNDER = u(0x0323) -- dot under =  ̣ = unstressed vowel with quality marker
local LINEUNDER = u(0x0331) -- line under =  ̱ = secondary-stressed vowel with quality marker
local DIA = u(0x0308) -- diaeresis = ̈
local SYLDIV = u(0xFFF0) -- used to represent a user-specific syllable divider (.) so we won't change it
local TEMP_Z = u(0xFFF1)
local TEMP_S = u(0xFFF2)
local TEMP_H = u(0xFFF3)
local TEMP_X = u(0xFFF4)
local stress = "ˈˌ"
local stress_c = "[" .. stress .. "]"
local quality = AC .. GR
local quality_c = "[" .. quality .. "]"
local accent = stress .. quality .. DOTOVER .. DOTUNDER .. LINEUNDER
local accent_c = "[" .. accent .. "]"
local glide = "jw"
local liquid = "lr"
local W = "[" .. glide .. "]"
local vowel = "aeɛioɔuEOyø"
local V = "[" .. vowel .. "]"
local VW = "[" .. vowel .. "jw]"
local NV = "[^" .. vowel .. "]"
local charsep_not_tie = accent .. "." .. SYLDIV
local charsep_not_tie_c = "[" .. charsep_not_tie .. "]"
local charsep = charsep_not_tie .. "‿⁀"
local charsep_c = "[" .. charsep .. "]"
local wordsep_not_tie = charsep_not_tie .. " #"
local wordsep = charsep .. " #"
local wordsep_c = "[" .. wordsep .. "]"
local C = "[^" .. vowel .. wordsep .. "_]" -- consonant
local C_OR_EOW_NOT_GLIDE_LIQUID = "[^" .. vowel .. charsep .. " _" .. glide .. liquid .. "]" -- consonant not lrjw, or end of word
local C_OR_TIE = "[^" .. vowel .. wordsep_not_tie .. "_]" -- consonant or tie (‿)
local front = "eɛij"
local front_c = "[" .. front .. "]"
local FRONTED = u(0x031F)
local voiced_C_c = "[bdglmnrvʣŋʎɲ]"

local full_affricates = { ["ʦ"] = "t͡s", ["ʣ"] = "d͡z", ["ʧ"] = "t͡ʃ", ["ʤ"] = "d͡ʒ" }

local recognized_suffixes = {
	-- -(m)ente, -(m)ento
	{"ment([eo])", "mént%1"}, -- must precede -ente/o below
	{"ent([eo])", "ènt%1"}, -- must follow -mente/o above
	-- verbs
	{"izzare", "iddzàre"}, -- must precede -are below
	{"izzarsi", "iddzàrsi"}, -- must precede -arsi below
	{"([ai])re", "%1" .. GR .. "re"}, -- must follow -izzare above
	{"([ai])rsi", "%1" .. GR .. "rsi"}, -- must follow -izzarsi above
	-- nouns
	{"izzatore", "iddzatóre"}, -- must precede -tore below
	{"([st])ore", "%1óre"}, -- must follow -izzatore above
	{"izzatrice", "iddzatrìce"}, -- must precede -trice below
	{"trice", "trìce"}, -- must follow -izzatrice above
	{"izzazione", "iddzatsióne"}, -- must precede -zione below
	{"zione", "tsióne"}, -- must precede -one below and follow -izzazione above
	{"one", "óne"}, -- must follow -zione above
	{"acchio", "àcchio"},
	{"acci([ao])", "àcci%1"},
	{"([aiu])ggine", "%1" .. GR .. "ggine"},
	{"aggio", "àggio"},
	{"([ai])gli([ao])", "%1" .. GR .. "gli%2"},
	{"ai([ao])", "ài%1"},
	{"([ae])nza", "%1" .. GR .. "ntsa"},
	{"ario", "àrio"},
	{"([st])orio", "%1òrio"},
	{"astr([ao])", "àstr%1"},
	{"ell([ao])", "èll%1"},
	{"etta", "étta"},
	-- do not include -etto, both ètto and étto are common
	{"ezza", "éttsa"},
	{"ficio", "fìcio"},
	{"ier([ao])", "ièr%1"},
	{"ifero", "ìfero"},
	{"ismo", "ìsmo"},
	{"ista", "ìsta"},
	{"izi([ao])", "ìtsi%1"},
	{"logia", "logìa"},
	-- do not include -otto, both òtto and ótto are common
	{"tudine", "tùdine"},
	{"ura", "ùra"},
	{"([^aeo])uro", "%1ùro"},
	-- adjectives
	{"izzante", "iddzànte"}, -- must precede -ante below
	{"ante", "ànte"}, -- must follow -izzante above
	{"izzando", "iddzàndo"}, -- must precede -ando below
	{"([ae])ndo", "%1" .. GR .. "ndo"}, -- must follow -izzando above
	{"([ai])bile", "%1" .. GR .. "bile"},
	{"ale", "àle"},
	{"([aeiou])nico", "%1" .. GR .. "nico"},
	{"([ai])stic([ao])", "%1" .. GR .. "stic%2"},
	-- exceptions to the following: àbato, àcato, acròbata, àgata, apòstata, àstato, cìato, fégato, omeòpata,
	-- sàb(b)ato, others?
	{"at([ao])", "àt%1"},
	{"([ae])tic([ao])", "%1" .. GR .. "tic%2"},
	{"ense", "ènse"},
	{"esc([ao])", "ésc%1"},
	{"evole", "évole"},
	-- FIXME: Systematic exceptions to the following in 3rd plural present tense verb forms
	{"ian([ao])", "iàn%1"},
	{"iv([ao])", "ìv%1"},
	{"oide", "òide"},
	{"oso", "óso"},
}

local unstressed_words = m_table.listToSet {
	"il", "lo", "la", "i", "gli", "le", -- definite articles
	"un", -- indefinite articles
	"mi", "ti", "si", "ci", "vi", "li", -- object pronouns
	"me", "te", "se", "ce", "ve", "ne", -- conjunctive object pronouns
	"e", "ed", "o", "od", -- conjunctions
	"ho", "hai", "ha", -- forms of [[avere]]
	"chi", "che", "non", -- misc particles
	"di", "del", "dei", -- prepositions
	"a", "ad", "al", "ai",
	"da", "dal", "dai",
	"in", "nel", "nei",
	"con", "col", "coi",
	"su", "sul", "sui",
	"per", "pei",
	"tra", "fra",
}

-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
	local retval = rsubn(term, foo, bar)
	return retval
end

-- version of rsubn() that returns a 2nd argument boolean indicating whether
-- a substitution was made.
local function rsubb(term, foo, bar)
	local retval, nsubs = rsubn(term, foo, bar)
	return retval, nsubs > 0
end

-- apply rsub() repeatedly until no change
local function rsub_repeatedly(term, foo, bar)
	while true do
		local new_term = rsub(term, foo, bar)
		if new_term == term then
			return term
		end
		term = new_term
	end
end

function export.to_phonemic(text, pagename)
	local abbrev_text
	if rfind(text, "^%^[àéèìóòù]$") then
		if pagename:find("[ %-]") then
			error("With abbreviated vowel spec " .. text .. ", the page name should be a single word: " .. text)
		end
		abbrev_text = mw.ustring.toNFD(text)
		text = pagename
	end
	local origtext = text
	text = ulower(text)

	-- Decompose combining characters: for instance, è → e + ◌̀
	text = mw.ustring.toNFD(text)

	-- convert commas and en/en dashes to IPA foot boundaries
	text = rsub_repeatedly(text, "%s*[,–—]%s*", " | ")
	-- question mark or exclamation point in the middle of a sentence -> IPA foot boundary
	text = rsub_repeatedly(text, "([^%s])%s*[!?]%s*([^%s])", "%1 | %2")
	text = rsub(text, "[!?]", "") -- eliminate remaining punctuation

	-- canonicalize multiple spaces and remove leading and trailing spaces
	local function canon_spaces(text)
		text = rsub(text, "%s+", " ")
		text = rsub(text, "^ ", "")
		text = rsub(text, " $", "")
		return text
	end

	text = canon_spaces(text)

	local origwords = rsplit(text, "[ %-]+")

	text = rsub(text, CFLEX, "") -- eliminate circumflex over î, etc.
	text = rsub(text, "y", "i")
	-- French/German vowels
	text = rsub(text, "u" .. DIA, "y")
	text = rsub(text, "o" .. DIA, "ø")
	text = rsub_repeatedly(text, "([^ ])'([^ ])", "%1‿%2") -- apostrophe between letters is a tie
	text = rsub(text, "(" .. C .. ")'$", "%1‿") -- final apostrophe after a consonant is a tie, e.g. [[anch']]
	text = rsub(text, "(" .. C .. ")' ", "%1‿ ") -- final apostrophe in non-utterance-final word is a tie
	text = rsub(text, "'", "") -- other apostrophes just get removed, e.g. [['ndragheta]], [[ca']].
	 -- For now, use a special marker of syntactic gemination at beginning of word; later we will
	 -- convert to ‿ and remove the space.
	text = rsub(text, "%*([ %-])(" .. C .. ")", "%1⁀%2")
	if rfind(text, "%*[ %-]") then
		error("* for syntactic gemination can only be used when the next word begins with a consonant: " .. origtext)
	end
	text = rsub(text, "([" .. DOTUNDER .. LINEUNDER .. "])(" .. quality_c .. ")", "%2%1") -- acute/grave first
	text = rsub(text, "([aiu])" .. AC, "%1" .. GR) -- áíú -> àìù

	local words = require("Module:string utilities").capturing_split(text, "([ %-]+)")
	for i, word in ipairs(words) do
		if (i % 2) == 1 then -- an actual word, not a separator
			local function err(msg)
				error(msg .. ": " .. origwords[(i + 1) / 2])
			end
			local is_prefix =
			    -- utterance-final followed by a hyphen, or
				i == #words - 2 and words[i+1] == "-" and words[i+2] == "" or
			    -- non-utterance-final followed by a hyphen
				i <= #words - 2 and words[i+1] == "- "
			-- First apply abbrev spec e.g. (à) or (ó) if given.
			if abbrev_text then
				local vowel_count = ulen(rsub(word, NV, ""))
				local abbrev_sub = abbrev_text:gsub("%^", "")
				local abbrev_vowel = usub(abbrev_sub, 1, 1)
				if vowel_count == 0 then
					err("Abbreviated spec " .. abbrev_text .. " can't be used with nonsyllabic word")
				elseif vowel_count == 1 then
					local before, vow, after = rmatch(word, "^(.*)(" .. V .. ")(" .. NV .. "*)$")
					if not before then
						error("Internal error: Couldn't match monosyllabic word: " .. word)
					end
					if abbrev_vowel ~= vow then
						err("Abbreviated spec " .. abbrev_text .. " doesn't match vowel " .. ulower(vow))
					end
					word = before .. abbrev_sub .. after
				else
					local before, penultimate, after = rmatch(word,
						"^(.-)(" .. V .. ")(" .. NV .. "*" .. V .. NV .. "*)$")
					if not before then
						error("Internal error: Couldn't match multisyllabic word: " .. word)
					end
					local before2, antepenultimate, after2 = rmatch(before,
						"^(.-)(" .. V .. ")(" .. NV .. "*)$")
					if abbrev_vowel ~= penultimate and abbrev_vowel ~= antepenultimate then
						err("Abbreviated spec ".. abbrev_text .. " doesn't match penultimate vowel " ..
							ulower(penultimate) .. (antepenultimate and " or antepenultimate vowel " ..
								ulower(antepenultimate) or ""))
					end
					if penultimate == antepenultimate then
						err("Can't use abbreviated spec " .. abbrev_text .. " here because penultimate and " ..
							"antepenultimate are the same")
					end
					if abbrev_vowel == antepenultimate then
						word = before2 .. abbrev_sub .. after2 .. penultimate .. after
					elseif abbrev_vowel == penultimate then
						word = before .. abbrev_sub .. after
					else
						error("Internal error: abbrev_vowel from abbrev_text " .. abbrev_text ..
							" didn't match any vowel or glide: " .. origtext)
					end
				end
			end

			if not is_prefix then
				if not rfind(word, quality_c) then
					-- Apply suffix respellings.
					for _, suffix_pair in ipairs(recognized_suffixes) do
						local orig, respelling = unpack(suffix_pair)
						local replaced
						word, replaced = rsubb(word, orig .. "$", respelling)
						if replaced then
							-- Decompose again because suffix replacements may have accented chars.
							word = mw.ustring.toNFD(word)
							break
						end
					end
				end

				-- Make known unstressed words without stress marks unstressed.
				local bare_word = rsub(word, "⁀", "") -- remove mark of syntactic gemination
				if unstressed_words[bare_word] then
					-- add DOTOVER to the first vowel for cases like [[dei]], [[sui]]
					word = rsub(word, "^(.-" .. V .. accent_c .. "*)", "%1" .. DOTOVER)
				end
			end

			-- Words marked with an acute or grave (quality marker) not followed by an indicator of secondary stress
			-- or non-stress get primary stress.
			word = rsub(word, "(" .. quality_c .. ")([^" .. DOTUNDER .. LINEUNDER .. "])", "%1ˈ%2")
			word = rsub(word, "(" .. quality_c .. ")$", "%1ˈ")
			-- Eliminate quality marker on a/i/u/y/ø, which now serves no purpose.
			word = rsub(word, "([aiuyø])" .. quality_c, "%1")
			-- LINEUNDER means secondary stress.
			word = rsub(word, LINEUNDER, "ˌ")

			-- Make prefixes unstressed. Primary stress markers become secondary.
			if is_prefix then
				word = rsub(word, "ˈ", "ˌ")
				-- add DOTOVER to the first vowel for cases like [[dei]], [[sui]]
				word = rsub(word, "^(.-" .. V .. accent_c .. "*)", "%1" .. DOTOVER)
			end
			words[i] = word
		end
	end
	text = table.concat(words, "")

	-- Convert hyphens to spaces, to handle [[Austria-Hungria]], [[franco-italiano]], etc.
	text = rsub(text, "%-", " ")
	-- canonicalize multiple spaces again, which may have been introduced by hyphens
	text = canon_spaces(text)
	-- put # at word beginning and end and double ## at text/foot boundary beginning/end
	text = rsub(text, " | ", "# | #")
	text = "##" .. rsub(text, " ", "# #") .. "##"

	-- Convert e/o unmarked for quality to E/O, and those marked for quality to e/o/ɛ/ɔ.
	local function convert_e_o(txt)
		return rsub(rsub(txt, "[eo]", {["e"] = "E", ["o"] = "O"}), "[EO]" .. quality_c, {
			["E" .. AC] = "e",
			["O" .. AC] = "o",
			["E" .. GR] = "ɛ",
			["O" .. GR] = "ɔ",
		})
	end

	text = convert_e_o(text)
	local words = rsplit(text, " ")

	for i, word in ipairs(words) do
		local function err(msg)
			error(msg .. ": " .. origwords[i])
		end
		-- Transcriptions must contain a primary or second stress indicator or must explicitly be
		-- marked as unstressed, and an e or o with primary stress must be marked for quality.
		if not rfind(word, "[ˈˌ" .. DOTOVER .. "]") then
			local vowel_count = ulen(rsub(word, NV, ""))
			if vowel_count > 2 then
				err("With more than two vowels and an unrecogized suffix, stress must be explicitly given")
			else
				local before, vow, after = rmatch(word, "^(.-)(" .. V .. ")(.*)$")
				if before then
					if vow == "E" or vow == "O" then
						err("When stressed vowel is e or o, it must be marked é/è or ó/ò to indicate quality")
					end
					word = before .. vow .. "ˈ" .. after
				end
			end
		end
		words[i] = word
	end

	text = table.concat(words, " ")
	-- Eliminate DOTOVER/DOTUNDER, which have served their purpose of preventing stress.
	text = rsub(text, "[" .. DOTOVER .. DOTUNDER .. "]", "")
	-- All remaining E/O are in unstressed syllables and become e/o.
	text = ulower(text)

	-- Random substitutions.
	text = rsub(text, "%[x%]", TEMP_X) -- [x] means /x/
	text = rsub(text, "^ex(" .. V .. ")", "eg[z]%1")
	text = text:gsub("x", "ks"):gsub("ck", "k"):gsub("sh", "ʃ")
	text = rsub(text, "%[z%]", TEMP_Z) -- [z] means /z/
	text = rsub(text, "%[s%]", TEMP_S) -- [z] means /s/
	text = rsub(text, "%[h%]", TEMP_H) -- [h] means /h/
	text = rsub(text, "%[tʃ%]", "ʧ")
	text = rsub(text, "%[dʒ%]", "ʤ")

	-- ci, gi + vowel
	-- Do ci, gi + e, é, è sometimes contain /j/?
	text = rsub(text,
		"([cg])([cg]?)i(" .. V .. ")", function(c, double, v)
			local out_cons
			if c == "c" then
				out_cons = "ʧ"
			else
				out_cons = "ʤ"
			end

			if double ~= "" then
				if double ~= c then
					error("Invalid sequence " .. c .. double .. ".")
				end

				out_cons = out_cons .. out_cons
			end

			return out_cons .. v
		end)

	-- Handle gl and gn.
	text = rsub(text, "gn", "ɲ")
	text = rsub(text, "gli(" .. V .. ")", "ʎ%1")
	text = rsub(text, "gl(‿?i)", "ʎ%1")

	-- Handle other cases of c, g.
	text = rsub(text, "([cg])([cg]?)(h?)(" .. charsep_c .. "*.)", function(first, double, h, after)
		-- Don't allow the combinations cg, gc. Or do something else?
		if double ~= "" and double ~= first then
			error("Invalid sequence " .. first .. double .. ".")
		end

		-- c, g is soft before e, i.
		local cons
		if rfind(after, front_c) and not rfind(h, "h") then
			if first == "c" then
				cons = "ʧ"
			else
				cons = "ʤ"
			end
		else
			if first == "c" then
				cons = "k"
			else
				cons = "g"
			end
		end

		if double ~= "" then
			cons = cons .. cons
		end

		return cons .. after
	end)

	-- ⟨qu⟩ represents /kw/.
	text = text:gsub("qu", "kw")
	-- ⟨gu⟩ (unstressed) + vowel represents /gw/.
	text = text:gsub("gu(" .. V .. ")", "gw%1")
	text = rsub(text, "q", "k") -- [[soqquadro]], [[qatariota]], etc.

	-- Assimilate n before labial, including across word boundaries; DiPI marks pronunciations like
	-- /ʤanˈpaolo/ for [[Gian Paolo]] as wrong. To prevent this, use _ or h between n and following labial.
	text = rsub(text, "n(" .. wordsep_c .. "*[mpb])", "m%1")

	-- Unaccented u or i following vowel (with or without accent) is a semivowel. (But 'iu' should be
	-- interpreted as /ju/ not /iw/.) By preceding the conversion of glides before vowels, this works
	-- correctly in the common sequence 'aiuo' e.g. [[guerraiuola]], [[acquaiuolo]]. Note that
	-- ci, gi + vowel, gli, qu must be dealt with beforehand.
	text = rsub_repeatedly(text, "(" .. V .. accent_c .. "*)([iu])([^" .. accent .. "])", function(v, gl, acc)
		if v == "i" and gl == "u" then
			return v .. gl .. acc
		else
			return v .. (gl == "i" and "j" or "w") .. acc
		end
	end)

	-- Unaccented u or i before another vowel is a glide. Do it repeatedly to handle oriuolo /orjwɔlo/.
	text = rsub_repeatedly(text, "([iu])(" .. V .. ")", function(gl, v)
		return (gl == "i" and "j" or "w") .. v
	end)

	-- sc before e, i is /ʃ/, doubled after a vowel.
	text = text:gsub("sʧ", "ʃ")

	text = rsub(text, "ddz", "ʣʣ")
	text = rsub(text, "dz", "ʣ")
	text = rsub(text, "tts", "ʦʦ")
	text = rsub(text, "ts", "ʦ")
	if rfind(text, "z") then
		error("z must be respelled (d)dz or (t)ts: " .. origtext)
	end

	-- Single ⟨s⟩ between vowels is /z/.
	text = rsub_repeatedly(text, "(" .. VW .. stress_c .. "?" .. charsep_c .. "*)s(" .. charsep_c .. "*" .. VW .. ")", "%1z%2")

	-- ⟨s⟩ immediately before a voiced consonant is always /z/
	text = rsub(text, "s(" .. charsep_c .. "*" .. voiced_C_c .. ")", "z%1")

	text = rsub(text, TEMP_Z, "z")
	text = rsub(text, TEMP_S, "s")
	text = rsub(text, TEMP_X, "x")

	-- Double consonant followed by end of word (e.g. [[stress]], [[staff]], [[jazz]]), or followed by a consonant
	-- other than a glide or liquid (e.g. [[pullman]]), should be reduced to single. Should not affect double
	-- consonants between vowels or before glides (e.g. [[occhio]], [[acqua]]) or liquids ([[pubblico]], [[febbraio]]),
	-- or words before a tie ([[mezz']], [[tutt']]).
	text = rsub_repeatedly(text, "(" .. C .. ")%1(" .. charsep_not_tie_c .. "*" .. C_OR_EOW_NOT_GLIDE_LIQUID .. ")", "%1%2")

	-- Between vowels (including glides), /ʃ ʎ ɲ t͡s d͡z/ are doubled (unless already doubled).
	-- Not simply after a vowel; 'z' is not doubled in e.g. [[azteco]].
	text = rsub_repeatedly(text, "(" .. VW .. stress_c .. "?" .. charsep_c .. "*)([ʦʣʃʎɲ])(" .. charsep_c .. "*" .. VW .. ")",
		"%1%2%2%3")

	-- Change user-specified . into SYLDIV so we don't shuffle it around when dividing into syllables.
	text = rsub(text, "%.", SYLDIV)

	-- Divide into syllables.
	-- First remove 'h' and '_', which have served their purpose of preventing context-dependent changes.
	-- They should not interfere with syllabification.
	text = rsub(text, "[h_]", "")
	-- Also now convert ⁀ into a copy of the following consonant with the preceding space converted to ⁀
	-- (which we will eventually convert to a tie symbol ‿, but for awhile we need to distinguish the two
	-- because automatic syllabic gemination in final-stress words happens only in multisyllabic words,
	-- and we don't want it to happen in monosyllabic words joined to a previous word by ⁀). We want to do
	-- this after all consonants have been converted to IPA (so the correct consonant is geminated)
	-- but before syllabification, since e.g. 'va* bène' should be treated as a single word 'va⁀b.bɛne' for
	-- syllabification.
	text = rsub(text, "# #⁀(‿?)(.)", "⁀%2%2")
	text = rsub_repeatedly(text, "(" .. V .. accent_c .. "*[‿⁀]?)(" .. C .. "[‿⁀]?" .. W .. "?[‿⁀]?" .. V .. ")", "%1.%2")
	text = rsub_repeatedly(text, "(" .. V .. accent_c .. "*[‿⁀]?" .. C .. "[‿⁀]?)(" .. C .. "[‿⁀]?" .. V .. ")", "%1.%2")
	text = rsub_repeatedly(text, "(" .. V .. accent_c .. "*[‿⁀]?" .. C_OR_TIE .. "+)(" .. C .. "[‿⁀]?" .. C .. "[‿⁀]?" .. V .. ")", "%1.%2")
	text = rsub(text, "([pbktdg][‿⁀]?)%.([lr])", ".%1%2")
	text = rsub(text, "([kg][‿⁀]?)%.w", ".%1w")
	text = rsub_repeatedly(text, "(" .. V .. accent_c .. "*[‿⁀]?)(" .. V .. ")", "%1.%2")

	-- User-specified syllable divider should now be treated like regular one.
	text = rsub(text, SYLDIV, ".")
	text = rsub(text, TEMP_H, "h")

	local last_word_self_gemination = rfind(text, "[ʦʣʃʎɲ]" .. stress_c .."*##$") and not
		-- In case the user used t͡ʃ explicitly
		rfind(text, "t͡ʃ" .. stress_c .."*##$")
	local first_word_self_gemination = rfind(text, "^##" .. stress_c .. "*[ʦʣʃʎɲ]")
	text = rsub(text, "([ʦʣʧʤ])(" .. charsep_c .. "*%.?)([ʦʣʧʤ]*)", function(affricate1, divider, affricate2)
		local full_affricate = full_affricates[affricate1]

		if affricate2 ~= "" then
			return usub(full_affricate, 1, 1) .. divider .. full_affricate
		end

		return full_affricate .. divider
	end)

	text = rsub(text, "g", "ɡ") -- U+0261 LATIN SMALL LETTER SCRIPT G

	local last_word_ends_in_primary_stressed_vowel = rfind(text, "ˈ##$")
	-- Last word is multisyllabic if it has a syllable marker in it. This should not happen across word boundaries
	-- (spaces) including ⁀, marking where two words were joined by syntactic gemination.
	local last_word_is_multisyllabic = rfind(text, "%.[^ ⁀]*$")
	local retval = {
		-- Automatic co-gemination (syntactic gemination of the following consonant in a multisyllabic word ending in
		-- a stressed vowel)
		auto_cogemination = last_word_ends_in_primary_stressed_vowel and last_word_is_multisyllabic,
		-- Last word ends in a vowel (an explicit * indicates co-gemination, i.e. syntactic gemination of the
		-- following consonant)
		last_word_ends_in_vowel = rfind(text, V .. stress_c .. "*" .. "##$"),
		-- Last word ends in a consonant (an explicit * indicates self-gemination of this consonant, i.e. the
		-- consonant doubles before a following vowel)
		last_word_ends_in_consonant = rfind(text, C .. "##$"),
		-- Last word ends in a consonant (ts dz ʃ ʎ ɲ) that triggers self-gemination before a following vowel
		auto_final_self_gemination = last_word_self_gemination,
		-- First word begins in a consonant (ts dz ʃ ʎ ɲ) that triggers self-gemination after a preceding vowel
		auto_initial_self_gemination = first_word_self_gemination,
	}

	-- Now that ⁀ has served its purpose, convert to a regular tie ‿.
	text = rsub(text, "⁀", "‿")
	
	-- Stress marks.
	-- Move IPA stress marks to the beginning of the syllable.
	text = rsub_repeatedly(text, "([#.])([^#.]*)(" .. stress_c .. ")", "%1%3%2")
	-- Suppress syllable mark before IPA stress indicator.
	text = rsub(text, "%.(" .. stress_c .. ")", "%1")
	-- Make all primary stresses but the last one in a given word be secondary. May be fed by the first rule above.
	text = rsub_repeatedly(text, "ˈ([^ #]+)ˈ", "ˌ%1ˈ")

	-- Remove # symbols at word/text boundaries and recompose.
	text = rsub(text, "#", "")
	text = mw.ustring.toNFC(text)

	retval.pron = text
	return retval
end

-- For bot usage; {{#invoke:it-pronunciation|to_phonemic_bot|SPELLING}}
function export.to_phonemic_bot(frame)
	local iparams = {
		[1] = {},
	}
	local iargs = require("Module:parameters").process(frame.args, iparams)
	return export.to_phonemic(iargs[1], mw.title.getCurrentTitle().text).pron
end

-- Incomplete and currently not used by any templates.
function export.to_phonetic(word, pagename)
	local phonetic = export.to_phonemic(word, pagename).pron

	-- Vowels longer in stressed, open, non-word-final syllables.
	phonetic = rsub(phonetic, "(ˈ" .. NV .. "*" .. V .. ")([" .. vowel .. "%.])", "%1ː%2")

	-- /n/ before /ɡ/ or /k/ is [ŋ]
	phonetic = rsub(phonetic, "n([%.ˈ]?[ɡk])", "ŋ%1") -- WARNING: IPA /ɡ/

	-- Imperfect: doesn't convert geminated k, g properly.
	phonetic = rsub(phonetic, "([kg])(" .. front_c .. ")", "%1" .. FRONTED .. "%2")
		:gsub("a", "ä")
		:gsub("n", "n̺") -- Converts n before a consonant, which is incorrect.

	return phonetic
end

function export.show(frame)
	local m_IPA = require("Module:IPA")

	local args = require("Module:parameters").process(
		frame:getParent().args,
		{
			-- terms to transcribe
			[1] = { list = true },
			["qual"] = { list = true, allow_holes = true },
			["ref"] = { list = true, allow_holes = true },
			pagename = {}, -- for testing
		})

	local final_cogemination = "triggers final cogemination (syntactic gemination of the initial consonant of the following word)"
	local final_non_cogemination = "does not trigger final cogemination (syntactic gemination of the initial consonant of the following word)"
	local final_self_cogemination = "triggers final self-gemination (syntactic gemination of the final consonant before a vowel)"
	local final_non_self_cogemination = "does not trigger final self-gemination (syntactic gemination of the final consonant before a vowel)"
	local initial_self_gemination = "triggers initial self-gemination (syntactic gemination of the initial consonant following a vowel)"
	local initial_non_cogemination = "blocks initial cogemination (syntactic gemination of the initial consonant when it would normally occur, i.e. following a stressed final vowel)"
	local initial_symbol_specs = {
		{"**", "optionally " .. initial_self_gemination},
		{"*", initial_self_gemination},
		{"°°", "optionally " .. initial_non_cogemination},
		{"°", initial_non_cogemination},
	}
	local final_vowel_symbol_specs = {
		{"**", "optionally " .. final_cogemination},
		{"*", final_cogemination},
		{"°", final_non_cogemination},
	}
	local final_consonant_symbol_specs = {
		{"**", "optionally " .. final_self_cogemination},
		{"*", final_self_cogemination},
		{"°", final_non_self_cogemination},
	}

	local pagename = args.pagename or mw.title.getCurrentTitle().text
	local respellings = args[1]
	if #respellings == 0 then
		respellings = {pagename}
	end
	local Array = require "Module:array"

	local transcriptions = Array(respellings):map(function(respelling, i)
		local qualifiers = {args.qual[i]}
		local prespec, actual_respelling, postspec = rmatch(respelling, "^([#!*°]*)(.-)([*°]*)$")
		respelling = actual_respelling
		if prespec:find("!!") then
			table.insert(qualifiers, 1, "elevated style")
			prespec = prespec:gsub("!!", "")
		end
		if prespec:find("!") then
			table.insert(qualifiers, 1, "careful style")
			prespec = prespec:gsub("!", "")
		end
		if prespec:find("#") then
			table.insert(qualifiers, 1, "traditional")
			prespec = prespec:gsub("#", "")
		end
		local phonemic = export.to_phonemic(respelling, pagename)
		local pretext, posttext

		if prespec == "" and phonemic.auto_initial_self_gemination then
			prespec = "*"
		end
		if postspec == "" and (phonemic.auto_cogemination or phonemic.auto_final_self_gemination) then
			postspec = "*"
		end

		local function check_symbol_spec(spec, recognized_specs, is_pre)
			for _, symbol_spec in ipairs(recognized_specs) do
				local symbol, text = unpack(symbol_spec)
				if symbol == spec then
					local abbr = '<abbr title="' .. text .. '"><sup>' .. symbol .. "</sup></abbr>"
					if is_pre then
						pretext = abbr
					else
						posttext = abbr
					end
					return
				end
			end
			error("Unrecognized " .. (is_pre and "initial" or "final") .. " symbol " .. spec)
		end

		if prespec ~= "" then
			check_symbol_spec(prespec, initial_symbol_specs, true)
		end
		if postspec ~= "" then
			if phonemic.last_word_ends_in_vowel then
				check_symbol_spec(postspec, final_vowel_symbol_specs, false)
			elseif phonemic.last_word_ends_in_consonant then
				check_symbol_spec(postspec, final_consonant_symbol_specs, false)
			else
				error("Last word ends in neither vowel nor consonant; final symbol " .. spec .. " not allowed here")
			end
		end
		local refs = args.ref[i] and require("Module:references").parse_references(args.ref[i]) or nil
		return {
			pron = "/" .. phonemic.pron .. "/",
			qualifiers = #qualifiers > 0 and qualifiers or nil,
			pretext = pretext,
			posttext = posttext,
			refs = refs,
		}
	end)

	return m_IPA.format_IPA_full(lang, transcriptions)
end

return export