Module:zh-usex: difference between revisions
Jump to navigation
Jump to search
Content deleted Content added
Justinrleung (talk | contribs) +nan pron_correction 咧 |
reduce size |
||
Line 141: | Line 141: | ||
["wuu"] = {}, |
["wuu"] = {}, |
||
} |
} |
||
local zh_format_start = "<span style=\"font-size:90%\"><span lang=\"zh\" class=\"Hani\">" |
|||
local zh_format_end = "</span></span>" |
|||
function export.show(frame) |
function export.show(frame) |
||
Line 151: | Line 154: | ||
local audio_file = args["a"] or args["audio"] or false |
local audio_file = args["a"] or args["audio"] or false |
||
local phonetic = "" |
local phonetic = "" |
||
local original_length = mw.ustring.len(gsub(example, "[^一-龯㐀-䶵]", "")) |
|||
local variety = args[3] or (ref_list[reference] and ref_list[reference][1] or false) or "MSC" |
local variety = args[3] or (ref_list[reference] and ref_list[reference][1] or false) or "MSC" |
||
variety_code = variety_list[variety][2] |
variety_code = variety_list[variety][2] |
||
Line 301: | Line 304: | ||
end |
end |
||
local tag_start = " <span style=\"color:darkgreen; font-size:x-small;\"><span>[</span>" -- "[[[w:MSC|MSC]]" is interpreted poorly, hence the dummy <span>s |
local tag_start = " <small><span style=\"color:darkgreen; font-size:x-small;\"><span>[</span>" -- "[[[w:MSC|MSC]]" is interpreted poorly, hence the dummy <span>s |
||
local tag_end = "<span>]</span></span>" |
local tag_end = "<span>]</span></span></small>" |
||
if display == "ruby" then |
if display == "ruby" then |
||
Line 364: | Line 367: | ||
-- indentation, font and identity tags |
-- indentation, font and identity tags |
||
if original_length > 10 then |
|||
if match(example, "[,。?!、:; ]") then |
|||
trad_text = "<dd> |
trad_text = "<dd>" .. zh_format_start .. trad_text .. zh_format_end |
||
if phonetic then |
if phonetic then |
||
phonetic = "<dl><dd>" .. phonetic |
phonetic = "<dl><dd>" .. phonetic |
||
Line 382: | Line 385: | ||
if simp_exist then |
if simp_exist then |
||
simp_text = "<dd> |
simp_text = "<dd>" .. zh_format_start .. simp_text .. zh_format_end |
||
simp_tag = tag_start .. variety_list[variety][1] .. ", <i>[[w:Simplified Chinese|simp.]]</i>" .. tag_end .. "</dd>" |
simp_tag = tag_start .. variety_list[variety][1] .. ", <i>[[w:Simplified Chinese|simp.]]</i>" .. tag_end .. "</dd>" |
||
end |
end |
||
Line 394: | Line 397: | ||
else |
else |
||
trad_text = |
trad_text = zh_format_start .. trad_text .. zh_format_end |
||
divider = " ― " |
divider = " ― " |
||
Line 407: | Line 410: | ||
if simp_exist then |
if simp_exist then |
||
simp_text = " / |
simp_text = " / " .. zh_format_start .. simp_text .. zh_format_end |
||
end |
end |
||
Revision as of 12:39, 9 May 2016
- The following documentation is located at Module:zh-usex/documentation. [edit]
- Useful links: subpage list • links • transclusions • testcases • sandbox
This module does the work for {{zh-x}}
, see there for more.
Data for this module is found in Module:zh-usex/data.
More (informal) testcases can be found in Template:zh-x/testcases. Put stuff that isn't working there!
local m_zh = require("Module:zh")
local m_zh_data = require("Module:zh/data")
local gsub = mw.ustring.gsub
local match = mw.ustring.match
local sub = mw.ustring.sub
local PAGENAME = PAGENAME or mw.title.getCurrentTitle().text
local export = {}
local variety_list = {
["MSC"] = { "[[w:Standard Chinese|MSC]]", "cmn", "Pinyin" },
["M-TJ"] = { "[[w:Tianjin dialect|Tianjin Mandarin]]", "cmn", "Pinyin" },
["CL"] = { "[[w:Classical Chinese|Classical Chinese]]", "cmn", "Pinyin" },
["C"] = { "[[w:Cantonese|Cantonese]]", "yue", "Jyutping" },
["C-GZ"] = { "[[w:Cantonese|Guangzhou Cantonese]]", "yue", "Jyutping" },
["C-LIT"] = { "[[w:Cantonese|Literary Cantonese]]", "yue", "Jyutping" },
["MD"] = { "[[w:Min Dong|Min Dong]]", "cdo", "[[w:Bàng-uâ-cê|Bàng-uâ-cê]] / IPA" },
["MN"] = { "[[w:Min Nan|Min Nan]]", "nan", "[[w:Pe̍h-ōe-jī|Pe̍h-ōe-jī]]" },
["TW"] = { "[[w:Taiwanese|Taiwanese]]", "nan", "[[w:Pe̍h-ōe-jī|Pe̍h-ōe-jī]]" },
["MN-T"] = { "[[w:Teochew dialect|Teochew]]", "nan-teo", "[[w:zh:潮州話拼音方案|Peng\'im]]" },
["W"] = { "[[w:Wu Chinese|Wu]]", "wuu", "IPA" },
["SH"] = { "[[w:Shanghainese|Shanghainese]]", "wuu", "IPA" },
["H"] = { "[[w:Hakka Chinese|Hakka]]", "hak", "[[w:Pha̍k-fa-sṳ|Pha̍k-fa-sṳ]]" },
}
local punctuation = {
[","] = ",", ["。"] = ".", ["、"] = ",",
["?"] = "?", ["!"] = "!",
["“"] = "‘", ["”"] = "’",
["‘"] = "‘", ["’"] = "’",
["《"] = "‘", ["》"] = "’",
["『"] = "‘", ["』"] = "’",
["「"] = "‘", ["」"] = "’",
["("] = "(", [")"] = ")",
[";"] = ";", [":"] = ":",
["|"] = "|", ["—"] = "-",
["·"] = " ", ["…"] = "...",
[" "] = ";",
}
local ref_list = {
['Analects'] = { "CL", "The ''[[w:Analects|Analects]] of Confucius'', circa 475 – 221 BCE" },
['Hanfeizi'] = { "CL", "''[[w:Han Feizi (book)|Han Feizi]]'', circa 2nd century BCE" },
['Hanshu'] = { "CL", "The ''[[w:Book of Han|Book of Han]]'', circa 1st century CE" },
['Liji'] = { "CL", "The ''[[w:Book of Rites|Book of Rites]]'', circa 4th – 2nd century BCE" },
['Mengzi'] = { "CL", "''[[w:Mencius (book)|Mengzi]] (Mencius)'', circa 4th century BCE" },
['Mozi'] = { "CL", "''[[w:Mozi|Mozi]] ([[w:zh:墨子 (书)|book]])'', circa 4th century BCE" },
['Shangshu'] = { "CL", "The ''[[w:Book of Documents|Book of Documents]]'', circa 4th – 3rd century BCE" },
['Shiji'] = { "CL", "The ''[[w:Records of the Grand Historian|Records of the Grand Historian]]'', by [[w:Sima Qian|Sima Qian]], circa 91 BCE" },
['Shijing'] = { "CL", "The ''[[w:Classic of Poetry|Classic of Poetry]]'', circa 11th – 7th centuries BCE" },
['Shujing'] = { "CL", "The ''[[w:Book of Documents|Book of Documents]]'', circa 7th – 4th centuries BCE" },
['Shuowen'] = { "CL", "''[[w:Shuowen Jiezi|Shuowen Jiezi]]'', circa 2nd century CE" },
['Houhanshu'] = { "CL", "The ''[[w:Book of the Later Han|Book of the Later Han]]'', circa 5th century CE" },
['Yijing'] = { "CL", "''[[w:I Ching|I Ching]]'', 3rd – 2nd millennia BCE" },
['Zhuangzi'] = { "CL", "''[[w:Zhuangzi (book)|Zhuangzi]]'', circa 3rd – 2nd centuries BCE" },
}
local pron_correction = {
["cmn"] = {
["吧"] = "ba",
["的"] = "de", ["都"] = "dōu",
["個"] = "ge", ["給"] = "gěi", ["更"] = "gèng",
["還"] = "hái",
["幾"] = "jǐ", ["將"] = "jiāng",
["了"] = "le",
["沒"] = "méi",
["漂"] = "piào",
["什"] = "shén",
["為"] = "wèi",
["要"] = "yào",
},
["yue"] = {
["若"] = "joek6",
["來"] = "loi4",
["華"] = "waa4",
["蛇"] = "se4",
},
["nan"] = {
["人"] = "-lâng-",
["共"] = "-kā-",
["的"] = "-ê-",
["講"] = "-kóng-",
["予"] = "-hō͘-",
["爸"] = "-pē-",
["汝"] = "-lí-",
["𨑨"] = "-chhit-",
["迌"] = "-thô-",
["𪜶"] = "-in-",
["一"] = "-chi̍t-",
["毋"] = "-m̄-",
["欲"] = "-beh-",
["到"] = "-kàu-",
["閣"] = "-koh-",
["佇"] = "-tī-",
["佮"] = "-kah-",
["仔"] = "-á-",
["塊"] = "-tè-",
["也"] = "-iā-",
["攏"] = "-lóng-",
["較"] = "-khah-",
["阿"] = "-a-",
["嬤"] = "-má-",
["字"] = "-jī-",
["多"] = "-to-",
["士"] = "-sū-",
["商"] = "-siong-",
["斷"] = "-tn̄g-",
["着"] = "-tio̍h-",
["戴"] = "-tì-",
["徛"] = "-khiā-",
["跤"] = "-kha-",
["誌"] = "-chì-",
["啥"] = "-siáⁿ-",
["行"] = "-kiâⁿ-",
["甲"] = "-kah-",
["雙"] = "-siang-",
["日"] = "-ji̍t-",
["咧"] = "-teh-"
},
["wuu"] = {},
}
local polysyllable_pron_correction = {
["cmn"] = {},
["yue"] = {},
["nan"] = {
["親像"] = "-chhin-chhiūⁿ-",
["的確"] = "-tek-khak-",
["歹勢"] = "-pháiⁿ-sè-",
["請假"] = "-chhéng-ká-"
},
["wuu"] = {},
}
local zh_format_start = "<span style=\"font-size:90%\"><span lang=\"zh\" class=\"Hani\">"
local zh_format_end = "</span></span>"
function export.show(frame)
local args = frame:getParent().args
local example = args[1] or error("Example unspecified.")
local translation = args[2] or error("Lacking translation.")
local reference = args["ref"] or args["r"] or false
local manual_tr = args["tr"] or false
local display = args["type"] or args["display_type"] or "plain"
local audio_file = args["a"] or args["audio"] or false
local phonetic = ""
local original_length = mw.ustring.len(gsub(example, "[^一-龯㐀-䶵]", ""))
local variety = args[3] or (ref_list[reference] and ref_list[reference][1] or false) or "MSC"
variety_code = variety_list[variety][2]
local link = args["link"] or args["l"] or "yes"
link = match(link, "n") == nil and not (not match(example, " ") and match(example, "[,。?!﹑]"))
-- automatically boldify pagetitle if nothing is in bold
if not match(example, "'''") and not punctuation[PAGENAME] then
example = gsub(example, PAGENAME, "'''" .. PAGENAME .. "'''")
example = gsub(example, "''''''", "")
end
-- tidying up the example, making it ready for transcription
example = gsub(example, "([?!,。、“”…;:‘’|()「」—《》· ])", " %1 ")
example = gsub(example, "^ *", "")
example = gsub(example, " *$", "")
example = gsub(example, " +", " ")
example = gsub(example, "%'%'%'([^%']+)%'%'%'", "<b>%1</b>")
example = gsub(example, "</b>(%[[^%[%]]+%])", "%1</b>")
example = gsub(example, "</b>({[^{}]+})", "%1</b>")
local ruby_start, ruby_mid, ruby_end = "<big><ruby><span class=\"Hani\">", "</span><rp> (</rp><rt><big>", "</big></rt><rp>)</rp></ruby></big>"
local ruby_words = {}
local trad_words, simp_words, tr_words = {}, {}, {}
simp_exist = m_zh.ts_determ(example) == "trad" or (match(example, "%[[^%[%]]+%]") and not match(example, "(.)%[%1%]"))
for word in mw.text.gsplit(example, " ", true) do
local trad_word, simp_word, tr_word, ruby_word = word, false, false, ""
-- various tricks for linking and display in trad. and simp.
trad_word = gsub(trad_word, "(.)%[(.)%]", "%1")
trad_word = gsub(trad_word, "{[^{}]+}", "")
trad_word = gsub(trad_word, "[%^%.]", "")
trad_word = gsub(trad_word, "\\", "|")
if simp_exist then
simp_word = gsub(m_zh.ts(word), ".%[(.)%]", "%1")
simp_word = gsub(simp_word, "%{[^%}]+%}", "")
simp_word = gsub(simp_word, "[%^%.]", "")
simp_word = gsub(simp_word, "\\", "|")
end
-- produce links
local contain_pagename = (gsub(gsub(gsub(trad_word, "</?b>", ""), "%^", ""), "-", "") == PAGENAME) and not punctuation[PAGENAME]
if match(trad_word, "|") or (link and not match(trad_word, "@") and not punctuation[word] and not contain_pagename) then
if match(trad_word, "<b>.+</b>") then
trad_word = mw.text.split(trad_word, "-", true)
for i,val in ipairs(trad_word) do
if match(val, "<b>") and not match(val, "</b>") then
trad_word[i] = "[[" .. gsub(val, "</?b>", "") .. "|" .. val .. "</b>]]"
elseif match(val, "</b>") and not match(val, "<b>") then
trad_word[i] = "[[" .. gsub(val, "</?b>", "") .. "|<b>" .. val .. "]]"
else
trad_word[i] = "[[" .. gsub(val, "</?b>", "") .. "|" .. val .. "]]"
end
end
trad_word = table.concat(trad_word)
else
trad_word = "[[" .. trad_word .. "]]"
trad_word = gsub(trad_word, "%-", "]][[")
end
if simp_exist then
if match(simp_word, "<b>.+</b>") then
simp_word = mw.text.split(simp_word, "-", true)
for i,val in ipairs(simp_word) do
if match(val, "<b>") and not match(val, "</b>") then
simp_word[i] = "[[" .. gsub(val, "</?b>", "") .. "|" .. val .. "</b>]]"
elseif match(val, "</b>") and not match(val, "<b>") then
simp_word[i] = "[[" .. gsub(val, "</?b>", "") .. "|<b>" .. val .. "]]"
else
simp_word[i] = "[[" .. gsub(val, "</?b>", "") .. "|" .. val .. "]]"
end
end
simp_word = table.concat(simp_word)
else
simp_word = "[[" .. simp_word .. "]]"
simp_word = gsub(simp_word, "%-", "]][[")
end
end
end
trad_word = gsub(trad_word, "@", "")
simp_word = simp_exist and gsub(simp_word, "@", "")
-- same tricks applied to transcription
if not manual_tr and (variety_code == "cmn" or variety_code == "yue" or variety_code == "nan") then
if punctuation[word] then
tr_word = punctuation[word]
else
real_word = true
tr_word = gsub(word, "@", "")
tr_word = gsub(tr_word, "%.", " ")
tr_word = gsub(tr_word, ".+\\", "")
tr_word = gsub(tr_word, ".%[([^%[%]]+)%]", "%1")
tr_word = gsub(tr_word, ".</b>(%{[^%}]+%})", "%1</b>")
tr_word = gsub(tr_word, ".%{([^%}]+)%}", "%1")
tr_word = gsub(tr_word, ".", pron_correction[variety_code])
for key,val in pairs(polysyllable_pron_correction[variety_code]) do
tr_word = gsub(tr_word, key, val)
end
if variety_code == "cmn" then
tr_word = gsub(tr_word, "%-", "")
tr_word = m_zh.py(tr_word)
elseif variety_code == "yue" then
tr_word = gsub(tr_word, ".", m_zh_data.jyutping)
tr_word = gsub(tr_word, "([a-z])([1-9])(-?)([1-9]?)", "%1%2%3%4 ")
elseif variety_code == "nan" then
tr_word = gsub(tr_word, "[一-鿌㐀-䶵 -〿𠀀-]+", function(text)
if m_zh.check_pron(text, 'nan', 1) then
return gsub(m_zh.check_pron(text, 'nan', 1), "/.+$", "")
else
text = gsub(text, ".", function(ch)
if m_zh.check_pron(ch, 'nan', 1) then
return gsub(m_zh.check_pron(ch, 'nan', 1), "/.+$", "") .. "-"
else
return ch
end
end)
return gsub(text, "-$", "")
end
end)
tr_word = gsub(tr_word, "%-+", "-")
tr_word = gsub(tr_word, "%-+([^ⁿa-záíúéóḿńàìùèòǹâîûêôāīūēōA-ZÁÍÚÉÓḾŃÀÌÙÈÒǸÂÎÛÊÔĀĪŪĒŌ])", "%1")
tr_word = gsub(tr_word, "([^ⁿa-záíúéóḿńàìùèòǹâîûêôāīūēōoóòôōA-ZÁÍÚÉÓḾŃÀÌÙÈÒǸÂÎÛÊÔĀĪŪĒŌOÓÒÔŌ̄͘])%-+", "%1")
tr_word = gsub(tr_word, "<b>", "-<b>")
tr_word = gsub(tr_word, "</b>", "</b>-")
tr_word = gsub(tr_word, "^%-+", "")
tr_word = gsub(tr_word, "%-+$", "")
tr_word = gsub(tr_word, "%%", "--")
end
end
end
if variety_code == "nan" then
trad_word = gsub(trad_word, "%%", "")
simp_word = simp_exist and gsub(simp_word, "%%", "")
end
if display == "ruby" then
ruby_word = ruby_start .. trad_word .. (simp_exist and "<br>" .. simp_word or "") .. ruby_mid .. (real_word and tr_word or "") .. ruby_end
table.insert(ruby_words, ruby_word)
else
table.insert(trad_words, trad_word)
table.insert(simp_words, simp_word or nil)
table.insert(tr_words, tr_word or nil)
end
end
local tag_start = " <small><span style=\"color:darkgreen; font-size:x-small;\"><span>[</span>" -- "[[[w:MSC|MSC]]" is interpreted poorly, hence the dummy <span>s
local tag_end = "<span>]</span></span></small>"
if display == "ruby" then
tag = " <ruby><rb><big>" ..
tag_start .. variety_list[variety][1] ..
(simp_exist
and ", ''[[Traditional Chinese|trad.]]''↑ + ''[[Simplified Chinese|simp.]]''↓"
or ", ''[[Traditional Chinese|trad.]]'' and ''[[Simplified Chinese|simp.]]''") .. tag_end ..
tag_start .. "''rom.'': " .. variety_list[variety][3] .. tag_end ..
"</rb></big></ruby>"
return table.concat(ruby_words, "") .. tag .. "<dl><dd><i>" .. translation .. "</i></dd></dl>"
else
trad_text = table.concat(trad_words)
simp_text = simp_exist and table.concat(simp_words) or false
phonetic = manual_tr or (#tr_words > 0 and table.concat(tr_words, " ") or false)
-- overall transcription formatting
if phonetic then
phonetic = gsub(phonetic, " </b>", "</b> ")
phonetic = gsub(phonetic, " ", " ")
if variety_code == "yue" then
phonetic = gsub(phonetic, "([a-z]+)([1-9%-]+)", "%1<sup>%2</sup>") -- superscript jyutping tones
end
phonetic = gsub(phonetic, " ([,%.?!;:’)])", "%1") -- remove excess spaces from punctiation
phonetic = gsub(phonetic, "([‘(]) ", "%1")
phonetic = gsub(phonetic, "[‘’]", "\"")
if not manual_tr then
phonetic = gsub(phonetic, "%'([^%'])", "%1") -- allow bolding for manual translit
if variety_code == "nan" then
phonetic = gsub(phonetic, " +%-%-", "--")
end
end
-- capitalisation
if match(example, "[。?!]") then
phonetic = "^" .. gsub(phonetic, "([%.?!]) ", "%1 ^")
end
phonetic = gsub(phonetic, "%^<b>", "<b>^")
phonetic = gsub(phonetic, "%^+.", mw.ustring.upper)
phonetic = gsub(phonetic, "%^", "")
if variety_code == "wuu" then
local wuu_pron = require("Module:wuu-pron")
phonetic = "<span class=\"IPA\">[" .. wuu_pron.ipa_conv(phonetic) .. "]</span>"
elseif variety_code == "cdo" then
local cdo_pron = require("Module:cdo-pron")
phonetic = "<i>" .. phonetic .. "</i> / " ..
(not match(phonetic, "-[^ ]+-")
and "<span class=\"IPA\"><small>[" .. cdo_pron.sentence(phonetic) .. "]</small></span>"
or "")
else
phonetic = "<i>" .. phonetic .. "</i>"
end
phonetic = "<span style=\"color:#404D52\">" .. phonetic .. "</span>"
end
end
-- indentation, font and identity tags
if original_length > 10 then
trad_text = "<dd>" .. zh_format_start .. trad_text .. zh_format_end
if phonetic then
phonetic = "<dl><dd>" .. phonetic
translation = "<dd>" .. translation .. "</dd></dl>"
tr_tag = tag_start .. variety_list[variety][3] .. tag_end .. "</dd>"
else
translation = "<dl><i>" .. translation .. "</i></dl>"
end
if audio_file then
audio = "<dd>[[File:" .. audio_file .. "]]</dd>"
end
trad_tag = tag_start .. variety_list[variety][1] .. ", <i>[[w:Traditional Chinese|trad.]]" ..
(simp_exist and "" or " and [[w:Simplified Chinese|simp.]]") .. "</i>" .. tag_end .. "</dd>"
if simp_exist then
simp_text = "<dd>" .. zh_format_start .. simp_text .. zh_format_end
simp_tag = tag_start .. variety_list[variety][1] .. ", <i>[[w:Simplified Chinese|simp.]]</i>" .. tag_end .. "</dd>"
end
if reference then
reference = "<dd><i><small>From:</i> " .. (ref_list[reference] and ref_list[reference][2] or reference) .. "</small></dd>"
end
return trad_text .. trad_tag .. (simp_text or "") .. (simp_tag or "") .. (reference or "") ..
(phonetic and phonetic .. tr_tag or "") .. (audio or "") .. translation
else
trad_text = zh_format_start .. trad_text .. zh_format_end
divider = " ― "
if variety_code ~= "cmn" then
ts_tag = tag_start .. variety_list[variety][1] .. tag_end
tr_tag = tag_start .. variety_list[variety][3] .. tag_end
end
if not phonetic then
translation = "<i>" .. translation .. "</i>"
end
if simp_exist then
simp_text = " / " .. zh_format_start .. simp_text .. zh_format_end
end
if audio_file then
audio = " [[File:" .. audio_file .. "]]"
end
return trad_text .. (simp_text or "") .. (ts_tag or "") .. divider ..
(phonetic and phonetic .. (tr_tag or "") .. (audio or "") .. divider or "") .. translation
end
end
return export