Module:tl-utilities

From Wiktionary, the free dictionary
Jump to navigation Jump to search


local export = {}

local m_str_utils = require("Module:string utilities")
local m_table = require("Module:table")
local baybayin_encode_module = "Module:tl-bay_sc"

local lang = require("Module:languages").getByCode("tl")
local sc_Tglg = require("Module:scripts").getByCode("Tglg")

local rfind = m_str_utils.find
local rmatch = m_str_utils.match
local rsubn = m_str_utils.gsub
local rsplit = m_str_utils.split
local toNFC = mw.ustring.toNFC
local toNFD = mw.ustring.toNFD
local trim = mw.text.trim
local u = m_str_utils.char
local ulen = m_str_utils.len
local ulower = m_str_utils.lower
local uupper = m_str_utils.upper

local AC = u(0x0301) -- acute =  ́
local GR = u(0x0300) -- grave =  ̀
local CFLEX = u(0x0302) -- circumflex =  ̂
local TILDE = u(0x0303) -- tilde =  ̃
local DIA = u(0x0308) -- diaeresis =  ̈
local MACRON = u(0x0304) -- macron =  ̄
local DOTOVER = u(0x0307) -- dot over =  ̇

local vowel = "aeëəiou" -- vowel
local V = "[" .. vowel .. "]"
local NV = "[^" .. vowel .. "]"
local accent = AC .. GR .. CFLEX .. MACRON
local accent_c = "[" .. accent .. "]"
local ipa_stress = "ˈˌ"
local ipa_stress_c = "[" .. ipa_stress .. "]"
local separator = accent .. ipa_stress .. "# .-"
local C = "[^" .. vowel .. separator .. "]" -- consonant

-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
	local retval = rsubn(term, foo, bar)
	return retval
end

-- apply rsub() repeatedly until no change
local function rsub_repeatedly(term, foo, bar)
	while true do
		local new_term = rsub(term, foo, bar)
		if new_term == term then
			return term
		end
		term = new_term
	end
end

local function decompose(text, recompose_e_dia)
	-- decompose everything but ñ and ü
	text = toNFD(text)
	text = rsub(text, ".[" .. TILDE .. DIA .. "]", {
		["n" .. TILDE] = "ñ",
		["N" .. TILDE] = "Ñ",
		["u" .. DIA] = "ü",
		["U" .. DIA] = "Ü",
	})
	if recompose_e_dia then
		text = rsub(text, ".[" .. DIA .. "]", {
			["e" .. DIA] = "ë",
			["E" .. DIA] = "Ë",
		})
	end
	return text
end

-- Fix capitalization but considers syllable breaks
local function fix_capitalization(input, caps_map)
	local syllbreak = 0
	local text = ulower(input)
	local syllbreak_chars = ".7"
	
	for i=1, #text do
		local text_pre = text:sub(1, i-1)
		local text_current = text:sub(i,i)
		local text_post = text:sub(i+1)
		local caps_current = caps_map:sub(i-syllbreak, i-syllbreak)
		if rfind(text_current, "[|" .. syllbreak_chars .. "]") and not rfind(caps_current, "[" .. syllbreak_chars .. "]")then
			syllbreak = syllbreak + 1
		elseif uupper(text_current) == caps_current then
			text = table.concat({text_pre, uupper(text_current), text_post})
		end
	end
	return text
end


function export.remove_accents(str)
	str = decompose(str, "recompose e-dia")
	str = rsub(str, "(.)" .. accent_c, "%1")
	return str
end

--Cleanup Baybayin inputs--
function export.decode_baybayin(text)
	local text = rsub(text, "[ᜀ-ᜟ᜵᜶]+", function(baybayin)
		result = lang:transliterate(baybayin, sc_Tglg)
		result = rsub(result, "([aeiou])([aeiou])", "%1-%2")
		result = rsub(result, "%-", "7")
		result = rsub(result, "([aeiou])", "%1" .. MACRON) -- No way to know stress in Baybayin. Disable for now.
		return result
	end)
	return text
end

-- "Align" syllabified respelling `syllab` to original spelling `spelling` by matching character-by-character, allowing
-- for extra syllable and accent markers in the syllabification and certain mismatches in the consonants. The goal is to
-- produce the appropriately syllabified version of the original spelling (the pagename) by matching characters in the
-- syllabified respelling to the original spelling, putting the syllable boundaries in the appropriate places in the
-- original spelling. As an example, given syllabified respelling 'a.ma.7ín' and original spelling 'amain', we would
-- like to produce 'a.ma.in'.
--
-- If we encounter an extra syllable marker (.), we allow and keep it. If we encounter an extra accent marker in thes
-- syllabification, we drop it. We allow for mismatches in capitalization and for certain other mismatches, e.g. extra
-- glottal stops (written 7), h in respelling vs. g or j in the original, etc. If we can't match, we return nil
-- indicating the alignment failed.
function export.align_syllabification_to_spelling(syllab, spelling)
	local result = {}
	local function concat_result()
		-- Postprocess to remove dots (syllable boundaries) next to hyphens.
		return (toNFC(table.concat(result)):gsub("%.%-", "-"):gsub("%-%.", "-"))
	end
	-- Remove glottal stop (7) from respelling to simplify the code below, because it's never found in the original
	-- spelling. (FIXME: We should do the same for diacritics, but they're currently removed earlier, in
	-- syllabify_from_spelling(). We should probably get rid of the removal there and put it here.)
	syllab = decompose(syllab:gsub("ː", ""), "recompose e-dia"):gsub("7", "")
	spelling = decompose(spelling, "recompose e-dia")
	local syll_chars = rsplit(ulower(syllab), "")
	local spelling_chars = rsplit(spelling, "")
	local i = 1
	local j = 1
	local function matches(uci, ucj)
		-- Return true if a syllabified respelling character (uci) matches the corresponding spelling char (ucj).
		-- Both uci and ucj should be lowercase.
		-- Sound is at the key, values are the letters sound can match
		local matching_chars = {
			["e"] = {"i"},
			["ë"] = {"a", "e", "o", "u"},
			["h"] = {"g", "j", "x"},
			["i"] = {"e"},
			["j"] = {"g"},
			["k"] = {"j"},
			["o"] = {"u"},
			["s"] = {"j", "c", "x"},
			["u"] = {"o"},
			["w"] = {"u", "o"},
			["y"] = {"i"}
		}

		return uci == ucj or (matching_chars[uci] and m_table.contains(matching_chars[uci], ucj))
	end
	local function silent_spelling_letter(ucj)
		return ucj == "h" or ucj == "'" or ucj == "-"
	end
	local function syll_at(pos)
		return syll_chars[pos] or ""
	end
	local function spell_at(pos)
		return spelling_chars[pos] or ""
	end
	local function uspell_at(pos)
		local c = spelling_chars[pos]
		return c and ulower(c) or ""
	end
	while i <= #syll_chars or j <= #spelling_chars do
		local uci = syll_at(i)
		local cj = spell_at(j)
		local ucj = uspell_at(j)

		if uci == "g" and syll_at(i - 1) == "n" and syll_at(i + 1) == "." and matches(syll_at(i + 2), ucj) and
			not matches(syll_at(i + 2), uspell_at(j + 1)) then
			-- As a special case, before checking whether the corresponding characters match, we have to skip an extra
			-- g in an -ng- sequence in the syllabified respelling if the corresponding spelling character matches the
			-- next respelling character (taking into account the syllable boundary). This is so that e.g.
			-- syll='ba.rang.gay' matches spelling='barangay'. Otherwise we will match the first respelling g against
			-- the spelling g and the second respelling g won't match. A similar case occurs with
			-- syll='E.vang.he.lis.ta' and spelling='Evangelista'. But we need an extra condition to not do this hack
			-- when syll='ba.rang.gay' matches spelling='baranggay'.
			i = i + 1
		elseif uci == "g" and ucj == "g" and uspell_at(j + 1) == TILDE  then
			table.insert(result, cj)
			table.insert(result, uspell_at(j + 1))
			i = i + 1
			j = j + 2
		elseif matches(uci, ucj) then
			table.insert(result, cj)
			i = i + 1
			j = j + 1
		elseif ucj == uspell_at(j - 1) and uci == "." and ucj ~= syll_at(i + 1) then
			-- See below. We want to allow for a doubled letter in spelling that is pronounced single, and preserve the
			-- doubled letter. But it's tricky in the presence of syllable boundaries on both sides of the doubled
			-- letter as well as doubled letters pronounced double. Specifically, there are three possibilities,
			-- exemplified by:
			-- (1) syll='Mal.lig', spelling='Mallig' -> 'Mal.lig';
			-- (2) syll='Ma.lig', spelling='Mallig' -> 'Ma.llig';
			-- (3) syll='Wil.iam', spelling='William' -> 'Will.iam'.
			-- If we copy the dot first, we get (1) and (2) right but not (3).
			-- If we copy the double letter first, we get (2) and (3) right but not (1).
			-- We choose to copy the dot first except in the situation exemplified by (3), where we copy the doubled
			-- letter first. The condition above handles (3) (the doubled letter matches against a dot) while not
			-- interfering with (1) (where the doubled letter also matches against a dot but the next letter in the
			-- syllabification is the same as the doubled letter, because the doubled letter is pronounced double).
			table.insert(result, cj)
			j = j + 1
		elseif silent_spelling_letter(ucj) and uci == "." and ucj ~= syll_at(i + 1) and
			not rfind(uspell_at(j + 1), V) then
			-- See below for silent h or apostrophe in spelling. This condition is parallel to the one directly above
			-- for silent doubled letters in spelling and handles the case of syllab='Abduramán', spelling='Abdurahman',
			-- which should be syllabified 'Ab.du.rah.man'. But we need a check to see that the next spelling character
			-- isn't a vowel, because in that case we want the silent letter to go after the period, e.g.
			-- syllab='Jumu7á', spelling='Jumu'ah' -> 'Ju.mu.'ah' (the 7 is removed above).
			table.insert(result, cj)
			j = j + 1
		elseif uci == "." then
			table.insert(result, uci)
			i = i + 1
		elseif ucj == uspell_at(j - 1) then
			-- A doubled letter in spelling that is pronounced single. Examples:
			-- * syllab='Ma.líg', spelling='Mallig' -> 'Ma.llig' (with l)
			-- * syllab='Lu.il.yér', spelling='Lhuillier' -> 'Lhu.ill.ier' (with l; a more complex example)
			-- * syllab='a.sa.la.mu a.lai.kum', spelling='assalamu alaikum' -> 'as.sa.la.mu a.lai.kum' (with s)
			-- * syllab='Jé.fer.son', spelling='Jefferson' -> 'Je.ffer.son' (with f)
			-- * syllab='Je.ma', spelling='Gemma' -> 'Ge.mma' (with m)
			-- * syllab='Ha.na', spelling='Hannah' -> 'Ha.nnah' (with n)
			-- * syllab='A.by', spelling='Abby' -> 'A.bby' (with b)
			-- * syllab='Ka.ba', spelling='Kaaba' -> 'Kaa.ba' (with a)
			-- * syllab='Fu.ji', spelling='Fujii' -> 'Fu.jii' (with i)
			table.insert(result, cj)
			j = j + 1
		elseif silent_spelling_letter(ucj) then
			-- A silent h, apostrophe or hyphen in spelling. Examples:
			-- * syllab='adán', spelling='adhan' -> 'a.dhan'
			-- * syllab='Atanasya', spelling='Athanasia' -> 'A.tha.nas.ia'
			-- * syllab='Cýntiya', spelling='Cynthia' -> 'Cyn.thi.a'
			-- * syllab='Ermóhenes', spelling='Hermogenes' -> 'Her.mo.ge.nes'
			-- * syllab='Abduramán', spelling='Abdurahman' -> 'Ab.du.rah.man'
			-- * syllab='Jumu7á', spelling='Jumu'ah' -> 'Ju.mu.'ah'
			-- * syllab='pag7ibig', spelling='pag-ibig' -> 'pag-i.big'
			table.insert(result, cj)
			j = j + 1
		elseif uci == AC or uci == GR or uci == CFLEX or uci == DIA or uci == TILDE or uci == MACRON or
			uci == "y" or uci == "w" then
			-- skip character
			i = i + 1
		else
			-- non-matching character
			mw.log(("Syllabification alignment mismatch for pagename '%s' (position %s, character %s), syllabified respelling '%s' (position %s, character %s), aligned result so far '%s'"
				):format(spelling, j, ucj, syllab, i, uci, concat_result()))
			return nil
		end
	end
	if i <= #syll_chars or j <= #spelling_chars then
		-- left-over characters on one side or the other
		mw.log(("Syllabification alignment mismatch for pagename '%s' (%s), syllabified respelling '%s' (%s), aligned result so far '%s'"
			):format(
				spelling, j > #spelling_chars and "end of string" or ("position %s, character %s"):format(j, uspell_at(j)),
				syllab, i > #syll_chars and "end of string" or ("position %s, character %s"):format(i, syll_at(i)),
				concat_result()))
		return nil
	end
	return concat_result()
end

function export.has_baybayin(text)
	return text:match("[ᜀ-ᜟ]")
end


function export.syllabify_from_spelling(text, pagename)
	-- Auto syllabifications start --
	local vowel = vowel .. "ẃý" -- vowel
	local V = "[" .. vowel .. "]"
	local NV = "[^" .. vowel .. "]"
	local C = "[^" .. vowel .. separator .."]" -- consonant
	
	-- canonicalize multiple spaces and remove leading and trailing spaces
	local function canon_spaces(text)
		text = rsub(text, "%s+", " ")
		text = rsub(text, "^ ", "")
		text = rsub(text, " $", "")
		return text
	end

	text = trim(text)
	text = canon_spaces(text)
	text = rsub(text, "[ᜀ-ᜟ]+", function(baybayin)
		return "<᜶" .. export.decode_baybayin(baybayin) .. "᜶>"
	end)
	
	text = decompose(text, "recompose e-dia")

	local origtext = text
	text = string.lower(text)

	text = rsub(text, "[.] ", "․ ")
	text = rsub(text, "[.]$", "․")

	-- put # at word beginning and end and double ## at text/foot boundary beginning/end
	text = rsub(text, " | ", "# | #")
	text = "##" .. rsub(text, " ", "# #") .. "##"
	text = rsub_repeatedly(text, "([.]?)#([.]?)", "#")

	text = rsub(text, "ng̃", "ŋ")
	text = rsub(text, "ng", "ŋ")
	text = rsub(text, "g̃", "ġ")
	text = rsub(text, "ch", "ĉ")
	text = rsub(text, "t_s", "ć")
	text = rsub(text, "sh", "ʃ")
	text = rsub(text, "gu([eëiy])", "ǵ%1")
	text = rsub(text, "qu([eëiy])", "ḱ%1")
	text = rsub(text, "r", "ɾ")
	text = rsub(text, "ɾɾ", "r")
	text = rsub(text, "ʔ", "7")

	text = rsub_repeatedly(text, "#(" .. C .. "+)u([aeio])","#%1u.%2")
	text = rsub_repeatedly(text, "#(" .. C .. "+)i([aeou])","#%1i.%2")
	text = rsub_repeatedly(text, "(" .. C .. ")u([aeio])","#%1.u%2")
	text = rsub_repeatedly(text, "(" .. C .. ")i([aeou])","#%1.i%2")
	text = rsub_repeatedly(text, "(" .. V .. accent_c .. "*)u([aeio])","%1.u%2")
	text = rsub_repeatedly(text, "(" .. V .. accent_c .. "*)o([aei])","%1.ó%2")
	text = rsub(text, "a(" .. accent_c .. "*)o([#.7])","a%1ó%2")

	-- eu rules
	text = rsub_repeatedly(text, "([^" .. vowel .. "#])([e])("  .. accent_c .. "?)([u])("  .. accent_c .. "?)","%1%2%3.%4%5")

	text = rsub(text, "y([ˈˌ." .. accent .. "]*)([bćĉdfgǵhjĵkḱlmnɲŋpɾrsʃtvwɟzʔ#" .. vowel .. "])","ý%1%2")
	text = rsub(text, "ý(" .. V .. ")", "y%1")
	text = rsub(text, "w([ˈˌ]?)([bćĉdfgǵjĵkḱlmnɲŋpɾrsʃtvwɟzʔ#" .. vowel .. "])","ẃ%1%2")
	text = rsub(text, "ẃ(" .. V .. ")","w%1")

	text = rsub(text, "(" .. V .. ")(" .. accent_c .. "?)ẃ([bdfgǵkḱpt])([ɾr" .. vowel .. separator .."])" ,"%1%2w%3%4")
	text = rsub(text, "(" .. V .. ")(" .. accent_c .. "?)ẃ([bfgǵkḱp])([l" .. vowel .. separator .."])" ,"%1%2w%3%4")
	text = rsub(text, "(" .. V .. ")(" .. accent_c .. "?)ý([bdfgǵkḱpt])([ɾr" .. vowel .. separator .."])" ,"%1%2y%3%4")
	text = rsub(text, "(" .. V .. ")(" .. accent_c .. "?)ý([bfgǵkḱp])([l" .. vowel .. separator .."])" ,"%1%2y%3%4")

	text = rsub_repeatedly(text, "(" .. V .. accent_c .. "*)(" .. C .. V .. ")", "%1.%2")

	-- "mb", "mp", "nd", "nk", "nt" combinations
	text = rsub_repeatedly(text, "(m)([bp])([^lɾrɟy" .. vowel .. separator .."])(" .. V .. ")", "%1%2.%3%4")
	text = rsub_repeatedly(text, "(n)([dk])([^lɾrɟy" .. vowel .. separator .. "])(" .. V .. ")", "%1%2.%3%4")
	text = rsub_repeatedly(text, "(n)([s])([^ɟy" .. vowel .. separator .. "])(" .. V .. ")", "%1%2.%3%4")
	text = rsub_repeatedly(text, "(n)([t])([^lɾrɟys" .. vowel .. separator .. "])(" .. V .. ")", "%1%2.%3%4")
	text = rsub_repeatedly(text, "(ŋ)([k])([^lɾrɟy" .. vowel .. separator .. "])(" .. V .. ")", "%1%2.%3%4")
	text = rsub_repeatedly(text, "([ɾr])([bćĉdfgǵkḱlmnpsʃvz])([^lɾrɟy" .. vowel .. separator .. "])(" .. V .. ")", "%1%2.%3%4")
	text = rsub_repeatedly(text, "([ɾr])([t])([sz]?)([^lɾrɟysʃ" .. vowel .. separator .. "])(" .. V .. ")", "%1%2%3.%4%5")
	text = rsub_repeatedly(text, "(s)([ktp])([^lɾrwɟy" .. vowel .. separator .. "])(" .. V .. ")", "%1%2.%3%4")

	text = rsub_repeatedly(text, "(" .. V .. accent_c .. "*" .. C .. ")(" .. C .. V .. ")", "%1.%2")
	text = rsub_repeatedly(text, "(" .. V .. accent_c .. "*" .. C .. "+)(" .. C .. C .. V .. ")", "%1.%2")
	text = rsub_repeatedly(text, "(" .. C .. ")%.s(" .. C .. ")", "%1s.%2")

	-- Any aeëo, or stressed iu, should be syllabically divided from a following aeëo or stressed iu.
	text = rsub_repeatedly(text, "([aeëo]" .. accent_c .. "*)([aeëo])", "%1.%2")
	text = rsub_repeatedly(text, "([aeëo]" .. accent_c .. "*)(" .. V .. accent_c .. ")", "%1.%2")
	text = rsub(text, "([iu]" .. accent_c .. ")([aeëo])", "%1.%2")
	text = rsub_repeatedly(text, "([iu]" .. accent_c .. ")(" .. V .. accent_c .. ")", "%1.%2")
	text = rsub_repeatedly(text, "i(" .. accent_c .. "*)i", "i%1.i")
	text = rsub_repeatedly(text, "u(" .. accent_c .. "*)u", "u%1.u")

	text = rsub(text, "ĉ", "ch")
	text = rsub(text, "ć", "ts")
	text = rsub(text, "ŋ", "ng")
	text = rsub(text, "ʃ", "sh")
	text = rsub(text, "ǵ.([ei])", "g.u%1")
	text = rsub(text, "ǵ", "gu")
	text = rsub(text, "ġ", "g̃")
	text = rsub(text, "ḱ", "qu")
	text = rsub(text, "r", "rr")
	text = rsub(text, "ɾ", "r")

	text = rsub_repeatedly(text, "([.]+)", ".")
	text = rsub(text, "[.]?-[.]?", "-")
	text = rsub(text, "[‿]([^ ])", "|%1")
	text = rsub(text, "[.]([^ ])", "|%1")

	text = rsub(text, "([|])+", "%1")

	-- remove # symbols at word and text boundaries
	text = rsub_repeatedly(text, "([.]?)#([.]?)", "")
	text = rsub(text, "․", ".")
	
	text = rsub(text, "ẃ", "w")
	text = rsub(text, "ý", "y")

	-- Fix Capitalization --
	text = fix_capitalization(text, origtext)

	-- Fix hyphens --
	-- FIXME!!! Why are we relying on looking at the pagename here? This should not be happening.
	origtext = pagename

	if (table.concat(rsplit(origtext, "-")) == table.concat(rsplit(table.concat(rsplit(text, "|")), "-"))) then
		syllbreak = 0
		for i=1, #text do
			if text:sub(i,i) == "|" then
				if origtext:sub(i-syllbreak, i-syllbreak) == "-" then
					text = table.concat({text:sub(1, i-1), "-", text:sub(i+1)})
				else
					syllbreak = syllbreak + 1
				end
			end
		end
	end
	
	-- Reencode Baybayin
	text = rsub(text, "[<][᜶]([^᜶]+)[᜶][>]", function(baybayin)
		baybayin = baybayin:gsub("|", "/"):gsub("7", "")
		local result = require(baybayin_encode_module).transcribe(baybayin:gsub("|", "/"), false, false, false) 
		result = rsub(result, " ᜵ ", "|")
		return result
	end)

	-- FIXME! Hack -- up above we changed periods to vertical bars. The rest of the code expects periods so change
	-- them back. We should clean up the code above to leave the periods alone.
	return (text:gsub("|", "%."))
end

function export.syllabify_and_align(respelling, pagename)
	if pagename == nil then
		pagename = respelling
	end
	local syllabification = export.syllabify_from_spelling(respelling, pagename)
	return export.align_syllabification_to_spelling(syllabification, pagename)
end

local function nasal_adjust(text1, text2, assimilation)
	local t1 = text1 
	t1 = rsub(t1, "ng([- ]*)$", "ŋ%1")
	t1 = rsub(t1, "m([- ]*)$", "ṃ")
	t1 = rsub(t1, "n([- ]*)$", "ṇ")
	
	local result = t1 .. text2
	if assimilation == "partial" then
		result = rsub(result, "[ŋṇ]([- ]*)([bp])", "m%1%2")
		result = rsub(result, "[ŋ]([- ]*)([dlnst])", "n%1%2")
		result = rsub(result, "[ṇ]([- ]*)([kgʔ])", "ŋ%1%2")
	elseif assimilation == "total" then
		result = rsub(result, "[ŋṇṃ][- ]*([bp])(" .. V .. ")%1%2(" .. NV .. ")(" .. V .. ")", "m%2m%2%3%4")
		result = rsub(result, "[ŋṇ][- ]*([dnst])(" .. V .. ")%1%2(" .. NV .. ")(" .. V .. ")", "n%2n%2%3%4")
		result = rsub(result, "[ŋṇ][- ]*([kgʔ])(" .. V .. ")%1%2(" .. NV .. ")(" .. V .. ")", "ŋ%2ŋ%2%3%4")
		result = rsub(result, "[ŋṇṃ][- ]*([bp])", "m")
		result = rsub(result, "[ŋṇ][- ]*([dnst])", "n")
		result = rsub(result, "[ŋṇ][- ]*([kgʔ])", "ŋ")
		result = rsub(result, "[ŋṇ]([- ]*)([l])", "n%1%2")
	end
	result = rsub(result, "ŋ", "ng")
	result = rsub(result, "ṃ", "m")
	result = rsub(result, "ṇ", "n")
	return result
end

local function add_prefix(root, prefix, assimilation, add_hyphen)
	local hyphen = ''
	local result = root
	
	local root_vowel_start = rfind(ulower(result), "^(" .. V .. ")")
	local affix_consonant_end = rmatch(prefix, C .. "$")
	if root_vowel_start then
		result = "ʔ" .. result
		result = rsub(result, "^ʔ(.)%1", "ʔ%1ʔ%1")
	end
	
	if add_hyphen or (root_vowel_start and affix_consonant_end) then
		hyphen = '-'
	end
	prefix = prefix .. hyphen
	result = nasal_adjust(prefix, result, assimilation)

	result = rsub(result, "[-]+", "-")
	return result
end

-- TODO
-- Prefix -- DONE
-- Infix
-- Suffix + changing spellings
-- Circumfix
-- By word affixation
-- Hyphen addition
-- Nasal assimilation
-- Syllabify
-- Reduplication
-- Pronunciation doesn't match spelling of root, provide phonetic spellings
-- D/R change
-- double o or uo
-- Metathesis (nl, w, y)
-- Diacritics (optional)
-- Syncope
-- Baybayin?
function export.add_affix(root, affix, assimilation, wordct, add_hyphen, metathesis, syllabify)
	local affix_type = ""
	local new_affix = affix
	local has_beginning_hyphen = rfind(affix, "^-")
	local has_ending_hyphen = rfind(affix, "-$")
	
	if has_beginning_hyphen and has_ending_hyphen then
		affix_type = "infix"
	elseif has_beginning_hyphen then
		affix_type = "suffix"
	elseif has_ending_hyphen then
		affix_type = "prefix"
	end
	
	if has_beginning_hyphen then
		new_affix = rsub(new_affix, "^-", "")
	end
	
	if has_ending_hyphen then
		new_affix = rsub(new_affix, "-$", "")
	end
	
	local word_idx = 1
	local wordct = wordct
	if wordct == nil then
		wordct = 0
	end
	
	local words = rsplit(root, " ")
	for i=1, #words do
		local hyph_words = rsplit(words[i], "-")
		for j=1, #hyph_words do
			local affix_action = nil
			if affix_type == "prefix" and 
			((wordct == 0 and word_idx == 1) or word_idx == wordct) then
				affix_action = add_prefix
			end
		
			if affix_action then
				hyph_words[j] = affix_action(hyph_words[j], new_affix, assimilation, add_hyphen)
			end
			hyph_words[j] = rsub(hyph_words[j], "ʔ", "")
			word_idx = word_idx + 1
		end
		words[i] = table.concat(hyph_words, '-')
	end

	words = table.concat(words, " ")
	return words
end

return export