// à copier/coller dans le module à partir d'ici
/*
* Module AWB de corrections de typographie et de syntaxe wiki :
* - nettoyage des bandeaux ;
* - nettoyage des catégories ;
* - nettoyage des images ;
* - nettoyage des modèles ;
* - nettoyage des liens internes ;
* - nettoyage clé de tri DEFAULTSORT ;
* - ajout clé de tri DEFAULTSORT si personnalité ;
* - ajout du modèle {{langue|ja|...}} sur les kanjis ;
* - ajout sépérateur {{,}} sur références ;
* - typographie des pourcentages ;
* - typographie des nombres ({{formatnum:}} ;
* - typographie et wikification des siècles ;
* - typographie et wikification des nièmes.
*
* Auteur : [[:fr:User:TiChou]]
* Date création : juillet 2007
* Date révision : 18 août 2007
*/
string[] BandeauxUniques;
int nbBU;
public string ProcessArticle(string ArticleText, string ArticleTitle, int wikiNamespace, out string Summary, out bool Skip)
{
Skip = false;
Summary = "";
string ArticleTextOld = ArticleText;
BandeauxUniques = new string[100];
nbBU = 0;
// Remplacement des divers modèles de multi bandeaux par le modèle {{Multi bandeau}}
ArticleText = Regex.Replace(ArticleText,
@"\{\{[ _]*(?:(?::?[ _]*fr[ _]*:[ _]*)?(?:Modèle|Template)[ _]*:[ _]*)?(?:Multi[ _]+bandeaux?|Multi-?bandeau|Multib)[ _]*(\|[^\}]*)?\}\}",
"{{Multi bandeau$1}}",
RegexOptions.IgnoreCase);
// Suppression des modèles multi bandeaux ou des paramètres vides se trouvant à l'intérieur d'un modèle {{Multi bandeau}}
ArticleText = Regex.Replace(ArticleText,
@"(?<=\{\{Multi bandeau(?:\|[^\|\}]*)*?)\| *(?:(?::? *fr *: *)?(?:Modèle|Template) *: *)?(?:Multi[ _]+bandeaux?|Multi-?bandeau|Multib|) *(?=(?:\|[^\|\}]*)*\}\})",
"",
RegexOptions.IgnoreCase);
// Nettoyage des (multi-)bandeaux et suppression des doublons
ArticleText = Regex.Replace(ArticleText,
@"(\{\{(?:Multi bandeau\|)?)((?<=\{\{Multi bandeau\|)[^\}]+|(?<=\{\{) *(?:(?::? *fr *: *)?(?:Modèle|Template) *: *)?(?:Ébauche(?:s?|[ _]+[^\|\}]+)|Portail[ _]+[^\|\}]+))(\}\}(?s:\s*))",
new MatchEvaluator(NettoyageBandeau),
RegexOptions.IgnoreCase);
// Groupement des bandeaux ébauches dans un unique modèle {{Multi bandeau}}
ArticleText = Regex.Replace(ArticleText,
@"(?:\{\{(?:Multi bandeau\|)?((?<=\{\{Multi bandeau\|)(?:[^\}]+\|)?Ébauche[s ][^\}]+|Ébauche(?:s?| [^\|\}]+))\}\}(\s*)){2,}",
new MatchEvaluator(MultiBandeau),
RegexOptions.Singleline);
// Groupement des bandeaux portail dans un unique modèle {{Multi bandeau}}
ArticleText = Regex.Replace(ArticleText,
@"(?:\{\{(?:Multi bandeau\|)?((?<=\{\{Multi bandeau\|)(?:[^\}]+\|)?Portail [^\}]+|Portail [^\|\}]+)\}\}(\s*)){2,}",
new MatchEvaluator(MultiBandeau),
RegexOptions.Singleline);
// Suppression des modèles {{Multi bandeau}} vide ou avec un seul bandeau
ArticleText = Regex.Replace(ArticleText,
@"\{\{Multi bandeau\|?\}\}\s*|(?<=\{\{)Multi bandeau\|(?=[^\|\}]+\}\})",
"");
// Correction des retours à la ligne entre bandeaux et catégories
ArticleText = Regex.Replace(ArticleText,
@"(?<=\{\{(?:Multi bandeau\|[^\}]+|Portail [^\|\}]+)\}\})\s*(?=(?:\{\{[ _]*DEFAULTSORT[ _]*:[^\}]+\}\}\s*)?\[\[ *Catégorie *:[^\]]+\]\].*)",
"\r\n\r\n",
RegexOptions.Singleline);
if (ArticleText != ArticleTextOld)
{
Summary += "net. bandeaux, ";
ArticleTextOld = ArticleText;
}
// Nettoyage des catégories
ArticleText = Regex.Replace(ArticleText,
@"(?<=\[\[)[ _]*(?:(:)[ _]*)?" +
@"(?:" + Variables.LangCode.ToString().ToLower() + @"[ _]*:[ _]*)?" +
@"(?:Category|" + Variables.Namespaces[14].Replace(":", "") + @")[ _]*:[ _]*" +
@"(?=[^\]]+\]\])",
"$1" + Variables.Namespaces[14], RegexOptions.IgnoreCase);
// Majuscule de la 1ère lettre des noms de catégorie
ArticleText = Regex.Replace(ArticleText,
@"(?<=\[\[" +
Variables.Namespaces[14] +
@")\p{Ll}",
new MatchEvaluator(UpFirstChar));
if (ArticleText != ArticleTextOld)
{
Summary += "net. catégories, ";
ArticleTextOld = ArticleText;
}
// ajout DEFAULTSORT si personne
Match match;
string key;
if (!Regex.IsMatch(ArticleText, @"\{\{[ _]*DEFAULTSORT[ _]*[:\|][^\}]*\}\}") &&
Regex.IsMatch(ArticleText, @"\[\[" + Variables.Namespaces[14] + @"(?:Naissance|Décès|Pseudonyme|Personnalité)"))
{
// Nettoyage DEFAULTSORT
match = Regex.Match(ArticleText, @"\[\[Catégorie:[^\|\]]+\|((?:[^,\]]+(?=,)|[^\]]{2,})(?:, *[^\]]+)?)\]\]");
if (!match.Success)
{
key = CleanKey(Tools.MakeHumanCatKey(ArticleTitle));
}
else
{
key = CleanKey(match.Groups[1].Value);
}
ArticleText = Regex.Replace(ArticleText,
@"(\[\[Catégorie:[^\]]+\]\].*)",
"{{DEFAULTSORT:" + key + "}}\n$1",
RegexOptions.Singleline);
ArticleText = Regex.Replace(ArticleText,
@"(?<=\[\[Catégorie:[^\|\]]+)\|[^\]]{2,}(?=\]\])",
"");
if (ArticleText != ArticleTextOld)
{
Summary += "ajout defaultsort, ";
ArticleTextOld = ArticleText;
}
}
// Nettoyage DEFAULTSORT
else
{
ArticleText = Regex.Replace(ArticleText,
@"(?<=\{\{)[ _]*DEFAULTSORT[ _]*[:\|][ _]*([^\}]*?)[ _]*(?=\}\})",
new MatchEvaluator(Key),
RegexOptions.IgnoreCase);
ArticleText = Regex.Replace(ArticleText,
@"(?<=\{\{DEFAULTSORT:[^\}]*\}\})\s*(?=\[\[" +
Variables.Namespaces[14] +
@")",
"\r\n",
RegexOptions.Singleline);
if (ArticleText != ArticleTextOld)
{
Summary += "net. defaultsort, ";
ArticleTextOld = ArticleText;
}
}
// Nettoyage des images
ArticleText = Regex.Replace(ArticleText,
@"(?<=\[\[)[ _]*(?:(:)[ _]*)?" +
@"(?:" + Variables.LangCode.ToString().ToLower() + @"[ _]*:[ _]*)?" +
@"(?:Category|" + Variables.Namespaces[6].Replace(":", "") + @")[ _]*:[ _]*",
"$1" + Variables.Namespaces[6], RegexOptions.IgnoreCase);
// Majuscule de la 1ère lettre des noms d'image
ArticleText = Regex.Replace(ArticleText,
@"(?<=\[\[" +
Variables.Namespaces[6] +
@")\p{Ll}",
new MatchEvaluator(UpFirstChar));
if (ArticleText != ArticleTextOld)
{
Summary += "net. images, ";
ArticleTextOld = ArticleText;
}
// Nettoyage des modèles
ArticleText = Regex.Replace(ArticleText,
@"(?<=\{\{)[ _]*" +
@"(?::?[ _]*" + Variables.LangCode.ToString().ToLower() + @"[ _]*:[ _]*)?" +
@"(?:Template|" + Variables.Namespaces[10].Replace(":", "") + @")[ _]*:[ _]*" +
@"(?=[^\}]+\}\})",
"",
RegexOptions.IgnoreCase);
if (ArticleText != ArticleTextOld)
{
Summary += "net. modèles, ";
ArticleTextOld = ArticleText;
}
// Nettoyage des liens
ArticleText = Regex.Replace(ArticleText,
@"(?<=\[\[)[ _]*" +
@"(?::?[ _]*" + Variables.LangCode.ToString().ToLower() + @"[ _]*:[ _]*)?" +
@"(?=[^\]]+\]\])",
"",
RegexOptions.IgnoreCase);
if (ArticleText != ArticleTextOld)
{
Summary += "net. liens, ";
ArticleTextOld = ArticleText;
}
if (!Regex.IsMatch(ArticleText, @"\{\{Unicode ?chinois\}\}|\{\{(?:Multi bandeau\s*\|[^\}]*)?Portail Chine(?:\|[^\}]*)?\}\}", RegexOptions.IgnoreCase) &&
Regex.IsMatch(ArticleText, @"Japon", RegexOptions.IgnoreCase))
{
// Ajout modèle {{langue|ja|...}}
ArticleText = Regex.Replace(ArticleText,
@"(?<!<nowiki>(?s:.(?<!</nowiki>))*)(?<!<pre>(?s:.(?<!</pre>))*)(?<!<math>(?s:.(?<!</math>))*)(?<!<!--(?s:.(?<!-->))*)(?<!<timeline>(?s:.(?<!</timeline>))*)(?<!\[\[(?:.(?<!\]\]|\|))*)(?<!\[\[(?:Catégorie|Category)\s*:(?:.(?<!\]\]))*)(?<!\{\{(?:.(?<!\}\}|\|))*)(?<!\{\{DEFAULTSORT\s*:(?:.(?<!\}\}))*)(?<!\{\{(?:langue|lang|Langue avec nom|Japonais|Nihongo|nji|Nom japonais inversé)\s*\|(?:.(?<!\}\}))*)(?<!\[(?:http|https|ftp|mailto):(?:.(?<!\]\]|\s))*)(([\p{IsKatakanaPhoneticExtensions}\p{IsEnclosedCJKLettersandMonths}\p{IsHiragana}\p{IsKatakana}\p{IsCJKUnifiedIdeographs}\p{IsCJKSymbolsandPunctuation}]+\s*)+(?<!\s))",
"{{langue|ja|$1}}",
RegexOptions.IgnoreCase);
if (ArticleText != ArticleTextOld)
{
Summary += "ajout {{langue|ja|...}}, ";
ArticleTextOld = ArticleText;
}
}
// Ajout séparateur {{,}} entre balises <ref>
ArticleText = Regex.Replace(ArticleText,
@"(?<=<\/ref>)\s*,?\s*(?=<ref)",
"{{,}}",
RegexOptions.IgnoreCase);
if (ArticleText != ArticleTextOld)
{
Summary += "ajout sép. <ref>, ";
ArticleTextOld = ArticleText;
}
//   ->
ArticleText = Regex.Replace(ArticleText,
@" ",
" ");
// Typo %
ArticleText = Regex.Replace(ArticleText,
@"\b(?<!style=""[^""]+:\s*|(?:width|height)\s*=\s*""?|https?://\S*|\d+[,\.])(\d+)(?:\s*|&(?:nb|thin)sp;)%",
"$1 %",
RegexOptions.IgnoreCase);
ArticleText = Regex.Replace(ArticleText,
@"\b(?<!style=""[^""]+:\s*|(?:width|height)\s*=\s*""?|https?://\S*)(\d+)[,\.](\d+)(?:\s*|&(?:nb|thin)sp;)%",
"$1,$2 %",
RegexOptions.IgnoreCase);
if (ArticleText != ArticleTextOld)
{
Summary += "typo %, ";
ArticleTextOld = ArticleText;
}
// 1234.56 -> {{formatnum:1234.56}}
ArticleText = Regex.Replace(ArticleText,
@"\b(?<!style=""[^""]*|https?://\S*|\[\[(?:.(?<!\]\]|\|))*|\[\[Catégorie\s*:(?:.(?<!\]\]))*|\{\{(?:.(?<!\}\}|\|))*)[1-9]\d{3,}\.\d+",
"{{formatnum:$&}}");
// 12.3456 -> {{formatnum:12.3456}}
ArticleText = Regex.Replace(ArticleText,
@"(?<=\b(?<!\{\{Coor (?:.(?<!\}\}))*|style=""[^""]*|https?://\S*|\[\[(?:.(?<!\]\]|\|))*|\[\[Catégorie\s*:(?:.(?<!\]\]))*|\{\{(?:.(?<!\}\}|\|))*)[1-9]\d?)\.(?=\d{4,})",
",",
RegexOptions.IgnoreCase);
// 1234,56 -> {{formatnum:1234.56}}
ArticleText = Regex.Replace(ArticleText,
@"\b(?<!style=""[^""]*|https?://\S*|\[\[(?:.(?<!\]\]|\|))*|\[\[Catégorie\s*:(?:.(?<!\]\]))*|\{\{(?:.(?<!\}\}|\|))*)[1-9]\d{3,},\d{1,2}",
new MatchEvaluator(formatnum2));
// 1234,5678 -> {{formatnum:1234.5678}}
ArticleText = Regex.Replace(ArticleText,
@"\b(?<!style=""[^""]*|https?://\S*|\[\[(?:.(?<!\]\]|\|))*|\[\[Catégorie\s*:(?:.(?<!\]\]))*|\{\{(?:.(?<!\}\}|\|))*)[1-9]\d{3,},\d{4,}",
new MatchEvaluator(formatnum2));
// 1 234.56 -> {{formatnum:1234.56}}
// 1 234 -> {{formatnum:1234}}
ArticleText = Regex.Replace(ArticleText,
@"\b(?<!style=""[^""]*|https?://\S*|\[\[(?:.(?<!\]\]|\|))*|\[\[Catégorie\s*:(?:.(?<!\]\]))*|\{\{(?:.(?<!\}\}|\|))*|IS[BS]N(?:-1[03] )?[ :=\|]?[-\d]*)[1-9]\d{0,2}[ ](?:\d{3}[ ])*\d{3}(?:\.\d+)?(?![, ]?\d)",
new MatchEvaluator(formatnum1));
// 1 234.56 -> {{formatnum:1234.56}}
// 1 234 -> {{formatnum:1234}}
ArticleText = Regex.Replace(ArticleText,
@"\b(?<!style=""[^""]*|https?://\S*|\[\[(?:.(?<!\]\]|\|))*|\[\[Catégorie\s*:(?:.(?<!\]\]))*|\{\{(?:.(?<!\}\}|\|))*)[1-9]\d{0,2}&(?:nb|thin)sp;(?:\d{3}&(?:nb|thin)sp;)*\d{3}(?:\.\d+)?(?!(?:,|&(?:nb|thin)sp;)?\d)",
new MatchEvaluator(formatnum1));
// 1,234.56 -> {{formatnum:1234.56}}
// 1,234 -> {{formatnum:1234}}
ArticleText = Regex.Replace(ArticleText,
@"\b(?<!style=""[^""]*|https?://\S*|\[\[(?:.(?<!\]\]|\|))*|\[\[Catégorie\s*:(?:.(?<!\]\]))*|\{\{(?:.(?<!\}\}|\|))*)[1-9]\d{0,2},(?:\d{3},)*\d{3}(?:\.\d+)?(?=\D)",
new MatchEvaluator(formatnum1));
// 1 234,56 -> {{formatnum:1234.56}}
ArticleText = Regex.Replace(ArticleText,
@"\b(?<!style=""[^""]*|https?://\S*|\[\[(?:.(?<!\]\]|\|))*|\[\[Catégorie\s*:(?:.(?<!\]\]))*|\{\{(?:.(?<!\}\}|\|))*|IS[BS]N(?:-1[03] )?[ :=\|]?[-\d]*)[1-9]\d{0,2}[ ](?:\d{3}[ ])*\d{3}(?:,\d+)?(?![\. ]?\d)",
new MatchEvaluator(formatnum2));
// 1 234,56 -> {{formatnum:1234.56}}
ArticleText = Regex.Replace(ArticleText,
@"\b(?<!style=""[^""]*|https?://\S*|\[\[(?:.(?<!\]\]|\|))*|\[\[Catégorie\s*:(?:.(?<!\]\]))*|\{\{(?:.(?<!\}\}|\|))*)[1-9]\d{0,2}&(?:nb|thin)sp;(?:\d{3}&(?:nb|thin)sp;)*\d{3}(?:,\d+)?(?!(?:\.|&(?:nb|thin)sp;)?\d)",
new MatchEvaluator(formatnum2));
// 1.234,56 -> {{formatnum:1234.56}}
// 1.234 -> {{formatnum:1234}}
ArticleText = Regex.Replace(ArticleText,
@"\b(?<!style=""[^""]*|https?://\S*|\[\[(?:.(?<!\]\]|\|))*|\[\[Catégorie\s*:(?:.(?<!\]\]))*|\{\{(?:.(?<!\}\}|\|))*)[1-9]\d{0,2}\.(?:\d{3}\.)*\d{3}(?:,\d+)?(?=\D)",
new MatchEvaluator(formatnum2));
// 12.3 -> 12,3
ArticleText = Regex.Replace(ArticleText,
@"(?<=\b(?<!\{\{Coor (?:.(?<!\}\}))*|style=""[^""]*|https?://\S*|\[\[(?:.(?<!\]\]|\|))*|\[\[Catégorie\s*:(?:.(?<!\]\]))*|\{\{(?:.(?<!\}\}|\|))*)(?:[1-9]\d+|\d))\.(?=\d+)",
",",
RegexOptions.IgnoreCase);
// 12345 -> {{formatnum:12345}}
ArticleText = Regex.Replace(ArticleText,
@"\b(?<!(?s:\{\{(?:Coor |Vue satellite WikiMapia|Traduc|OCLC)(?:.(?<!\}\}))*)|style=""[^""]*|https?://\S*|\[\[(?:.(?<!\]\]|\|))*|\[\[Catégorie\s*:(?:.(?<!\]\]))*|\{\{(?:.(?<!\}\}|\|))*|IS[BS]N(?:-1[03] )?[ :=\|]?[-\d]*|(?:old)?id=|#|\d[\.,])[1-9]\d{4,}",
"{{formatnum:$&}}",
RegexOptions.IgnoreCase);
// 3456 -> {{formatnum:3456}}
ArticleText = Regex.Replace(ArticleText,
@"(?<!(?s:\{\{(?:Coor |Vue satellite WikiMapia|Traduc|OCLC)(?:.(?<!\}\}))*)|style=""[^""]*|https?://\S*|\[\[(?:.(?<!\]\]|\|))*|\[\[Catégorie\s*:(?:.(?<!\]\]))*|\{\{(?:.(?<!\}\}|\|))*|IS[BS]N(?:-1[03] )?[ :=\|]?[-\d]*|(?:old)?id=|#|\d[\.,])\b[3-9]\d{3}\b",
"{{formatnum:$&}}",
RegexOptions.IgnoreCase);
// 2233 -> {{formatnum:2233}}
ArticleText = Regex.Replace(ArticleText,
@"(?<!(?s:\{\{(?:Coor |Vue satellite WikiMapia|Traduc|OCLC)(?:.(?<!\}\}))*)|style=""[^""]*|https?://\S*|\[\[(?:.(?<!\]\]|\|))*|\[\[Catégorie\s*:(?:.(?<!\]\]))*|\{\{(?:.(?<!\}\}|\|))*|IS[BS]N(?:-1[03] )?[ :=\|]?[-\d]*|(?:old)?id=|#|\d[\.,])\b2[1-9]\d{2}\b",
"{{formatnum:$&}}",
RegexOptions.IgnoreCase);
// 2023 -> {{formatnum:2033}}
ArticleText = Regex.Replace(ArticleText,
@"(?<!(?s:\{\{(?:Coor |Vue satellite WikiMapia|Traduc|OCLC)(?:.(?<!\}\}))*)|style=""[^""]*|https?://\S*|\[\[(?:.(?<!\]\]|\|))*|\[\[Catégorie\s*:(?:.(?<!\]\]))*|\{\{(?:.(?<!\}\}|\|))*|IS[BS]N(?:-1[03] )?[ :=\|]?[-\d]*|(?:old)?id=|#|\d[\.,])\b20[2-9]\d\b",
"{{formatnum:$&}}",
RegexOptions.IgnoreCase);
// - {{formatnum:1234}} -> {{formatnum:-1234}}
ArticleText = Regex.Replace(ArticleText,
@"\b([-\+])(?:[ ]|&(?:nb|thin)sp;)*\{\{formatnum:([\d\.]+)\}\}",
"{{formatnum:$1$2}}");
if (ArticleText != ArticleTextOld)
{
Summary += "typo nombre, ";
ArticleTextOld = ArticleText;
}
// {{XVIIe siècle|qqchose}} -> {{XVIIe siècle}}
ArticleText = Regex.Replace(ArticleText,
@"\{\{[ _]*([XVI]+er?)[ _]+siècle[ _]*( av\. J\.-C\.)?[ _]*\|[^\}]*\}\}",
"{{$1 siècle$2}}");
// [[XVIIe siècle]] -> {{XVIIe siècle}}
ArticleText = Regex.Replace(ArticleText,
@"\[\[[ _]*([XVI]+er?)[ _]+siècle[ _]*\]\]",
"{{$1 siècle}}");
// [[XVIIe siècle|XVII{{e}} siècle]] -> {{XVIIe siècle}}
ArticleText = Regex.Replace(ArticleText,
@"\[\[[ _]*([XVI]+er?)[ _]+siècle[ _]*\| *[XVI]+ *(?:\{\{(?:[Ee]xp\|)?|<sup(?: +class=""exposant"")?>)?(?:i?[éeè]me\b|[éeè]\b|°)(?:\}\}|</sup>)? +[Ss](?:i[éeè]cle|\.) *\]\]",
"{{$1 siècle}}");
// [[IIe siècle av. J.-C.]] -> {{IIe siècle av. J.-C.}}
ArticleText = Regex.Replace(ArticleText,
@"\[\[[ _]*([CLXVI]+er?)[ _]+siècle[ _]+av\.[ _]+J\.-C\.[ _]*\]\]",
"{{$1 siècle av. J.-C.}}");
// [[IIe siècle av. J.-C.|II{{e}} siècle av. J.-C.]] -> {{IIe siècle av. J.-C.}}
ArticleText = Regex.Replace(ArticleText,
@"\[\[[ _]*([XVI]+er?)[ _]+siècle[ _]+av\.[ _]+J\.-C\.[ _]*\| *[XVI]+ *(?:\{\{(?:[Ee]xp\|)?|<sup(?: +class=""exposant"")?>)?(?:i?[éeè]me\b|[éeè]\b|°)(?:\}\}|</sup>)? +[Ss](?:i[éeè]cle|\.) +[Ss](?:i[éeè]cle|\.) +[aA]v(?:ant|\.) +J(?:ésus|\.)?[- ]?C(?:hrist|\.)? *\]\]",
"{{$1 siècle av. J.-C.}}");
// [[Au XVIIe siècle et après]] -> [[Au XVIIe siècle et après|Au {{s-|XVII|e}} et après]]
ArticleText = Regex.Replace(ArticleText,
@"(?<=\[\[(?!Catégorie:)):?([^\|\]]*)\b([XVI]+)(er?) +siècle\b([^\|]*?)(?=\]\])",
"$&|$1{{s-|$2|$3}}$4");
// [[XVIIe siècle|XVII{{e}}]] -> {{s mini|XVII|e}}
ArticleText = Regex.Replace(ArticleText,
@"\[\[[ _]*([XVI]+)(er?)[ _]+siècle[ _]*\| *[XVI]+ *(?:\{\{(?:[Ee]xp\|)?|<sup(?: +class=""exposant"")?>)?(?:i?[éeè]me\b|[éeè]\b|°)(?:\}\}|</sup>)? *\]\]",
"{{s mini|$1|$2}}");
// [[IIe siècle av. J.-C.|II{{e}}]] -> {{-s mini|II|e}}
ArticleText = Regex.Replace(ArticleText,
@"\[\[[ _]*([XVI]+)(er?)[ _]+siècle[ _]+av\.[ _]+J\.-C\.[ _]*\| *[XVI]+ *(?:\{\{(?:[Ee]xp\|)?|<sup(?: +class=""exposant"")?>)?(?:i?[éeè]me\b|[éeè]\b|°)(?:\}\}|</sup>)? *\]\]",
"{{-s mini|$1|$2}}");
if (ArticleText != ArticleTextOld)
{
Summary += "wikif lien siècle, ";
ArticleTextOld = ArticleText;
}
// Ier siècle av. J.-C. -> {{-s-|I|er}}
ArticleText = Regex.Replace(ArticleText,
@"(?<!\[\[(?:.(?<!\]\]|\|))*|\[\[Catégorie\s*:(?:.(?<!\]\]))*|\{\{(?:.(?<!\}\}|\|))*)(?:\bI|\b1|\{\{I\}\}) *(?:\{\{(?:[Ee]xp\|)?|<sup(?: +class=""exposant"")?>)?(?:i?[éeè]me|[éeè°]|er)(?:\}\}|</sup>)? +[Ss](?:i[éeè]cle|\.) +[aA]v(?:ant|\.) +J(?:ésus|\.)?[- ]?C(?:hrist\b|\.|\b)",
"{{-s-|I|er}}");
// 3e siècle av. J.-C. -> {{-s-|III|e}}
ArticleText = Regex.Replace(ArticleText,
@"(?<!\[\[(?:.(?<!\]\]|\|))*|\[\[Catégorie\s*:(?:.(?<!\]\]))*|\{\{(?:.(?<!\}\}|\|))*)\b([12]?\d) *(?:\{\{(?:[Ee]xp\|)?|<sup(?: +class=""exposant"")?>)?(?:i?[éeè]me|[éeè°])(?:\}\}|</sup>)? +[Ss](?:i[éeè]cle|\.) +[aA]v(?:ant|\.) +J(?:ésus|\.)?[- ]?C(?:hrist\b|\.|\b)",
"{{-s-|{{subst:Nombre en romain|$1|subst=subst:}}|e}}");
// VIe siècle av. J.-C. -> {{-s-|VI|e}}
ArticleText = Regex.Replace(ArticleText,
@"(?<!\[\[(?:.(?<!\]\]|\|))*|\[\[Catégorie\s*:(?:.(?<!\]\]))*|\{\{(?:.(?<!\}\}|\|))*)(?:\b([CLXVI]+)|\{\{([CLXVI]+)\}\}) *(?:\{\{(?:[Ee]xp\|)?|<sup(?: +class=""exposant"")?>)?(?:i?[éeè]me|[éeè°])(?:\}\}|</sup>)? +[Ss](?:i[éeè]cle|\.) +[aA]v(?:ant|\.) +J(?:ésus|\.)?[- ]?C(?:hrist\b|\.|\b)",
"{{-s-|$1$2|e}}");
// {{s-|III|e}} av. J.-C. -> {{-s-|III|e}}
ArticleText = Regex.Replace(ArticleText,
@"\{\{s(-)?\|([CLXVI]+\|er?)\}\}s? +[aA]v(?:ant|\.) +J(?:ésus|\.)?[- ]?C(?:hrist\b|\.|\b)",
"{{-s$1|$2}}");
// Ier siècle -> {{s-|I|er}}
ArticleText = Regex.Replace(ArticleText,
@"(?<!\[\[(?:.(?<!\]\]|\|))*|\[\[Catégorie\s*:(?:.(?<!\]\]))*|\{\{(?:.(?<!\}\}|\|))*)(?:\bI|\b1|\{\{I\}\}) *(?:\{\{(?:[Ee]xp\|)?|<sup(?: +class=""exposant"")?>)?(?:i?[éeè]me|[éeè°]|er)(?:\}\}|</sup>)? +[Ss](?:i[éeè]cle\b|\.)",
"{{s-|I|er}}");
// 16{{e}} et 17{{e}} siècles -> {{sp-|XVI|e|et|XVII|e|s}}
ArticleText = Regex.Replace(ArticleText,
@"(?<!\[\[(?:.(?<!\]\]|\|))*|\[\[Catégorie\s*:(?:.(?<!\]\]))*|\{\{(?:.(?<!\}\}|\|))*)\b([12]?\d) *(?:\{\{(?:[Ee]xp\|)?|<sup(?: +class=""exposant"")?>)?(?:i?[éeè]me|[éeè°])(?:\}\}|</sup>)?\s*(et(?: le| du)?|au|-|/|ou)\s*([12]?\d) *(?:\{\{(?:[Ee]xp\|)?|<sup(?: +class=""exposant"")?>)?(?:i?[éeè]me|[éeè°])(?:\}\}|</sup>)? +[Ss](?:i[éeè]cles?\b|\.)",
"{{sp-|{{subst:Nombre en romain|$1|subst=subst:}}|e|$2|{{subst:Nombre en romain|$3|subst=subst:}}|e|s}}",
RegexOptions.IgnoreCase);
// 3e siècle -> {{s-|III|e}}
ArticleText = Regex.Replace(ArticleText,
@"(?<!\[\[(?:.(?<!\]\]|\|))*|\[\[Catégorie\s*:(?:.(?<!\]\]))*|\{\{(?:.(?<!\}\}|\|))*)\b([12]?\d) *(?:\{\{(?:[Ee]xp\|)?|<sup(?: +class=""exposant"")?>)?(?:i?[éeè]me|[éeè°])(?:\}\}|</sup>)? +[Ss](?:i[éeè]cle\b|\.)",
"{{s-|{{subst:Nombre en romain|$1|subst=subst:}}|e}}");
// VIe siècle -> {{s-|VI|e}}
ArticleText = Regex.Replace(ArticleText,
@"(?<!\[\[(?:.(?<!\]\]|\|))*|\[\[Catégorie\s*:(?:.(?<!\]\]))*|\{\{(?:.(?<!\}\}|\|))*)(?:\b([XVI]+)|\{\{([XVI]+)\}\}) *(?:\{\{(?:[Ee]xp\|)?|<sup(?: +class=""exposant"")?>)?(?:i?[éeè]me|[éeè°])(?:\}\}|</sup>)? +[Ss](?:i[éeè]cle\b|\.)",
"{{s-|$1$2|e}}");
// VIe -> {{s mini-|VI|e}}
ArticleText = Regex.Replace(ArticleText,
@"(?<!\[\[(?:.(?<!\]\]|\|))*|\[\[Catégorie\s*:(?:.(?<!\]\]))*|\{\{(?:.(?<!\}\}|\|))*)(?:\b([XVI]+)|\{\{([XVI]+)\}\}) *(?:\{\{(?:[Ee]xp\|)?|<sup(?: +class=""exposant"")?>)?(?:i?[éeè]me\b|[éeè]\b|°)(?:\}\}|</sup>)?(?!(?:\}\}|</sup>)?\s+(?:Reich|République|arrondissement|[cC]ongrès|[éÉ]dition))",
"{{s mini-|$1$2|e}}");
// Ier -> {{s mini-|I|er}}
ArticleText = Regex.Replace(ArticleText,
@"(?<!\[\[(?:.(?<!\]\]|\|))*|\[\[Catégorie\s*:(?:.(?<!\]\]))*|\{\{(?:.(?<!\}\}|\|))*)(?<!(?:François|Henri|Napoléon|Louis|Gabriel|Arthur|Ferdinand|Nicolas|Georges?|Alexandre|Charles|Frédéric|Richard)\s+)\bI *(?:\{\{(?:[Ee]xp\|)?|<sup(?: +class=""exposant"")?>)?(?:i?[éeè]me\b|[éeè]\b|°|er\b)(?:\}\}|</sup>)?(?!(?:\}\}|</sup>)?\s+(?:Reich|arrondissement|[cC]ongrès))",
"{{s mini-|I|er}}");
// {{-s mini-|VI|e}} et {{-s-|VII|e}} -> {{-sp-|VI|e|et|VII|e|s}}
ArticleText = Regex.Replace(ArticleText,
@"\{\{(?:-?s mini(-)?\|([XVI]+)\|(er?)|([XVI]+)(er?))\}\}\s*(et|au|-|/)\s*\{\{-s(?: mini)?-?\|([XVI]+\|er?)\}\}(?:\s+s(?:i[éeè]cles?|\.) +[aA]v(?:ant|\.) +J(?:ésus|\.)?[- ]?C(?:hrist\b|\.|\b)|s)?",
"{{-sp$1|$2$4|$3$5|$6|$7|s}}",
RegexOptions.IgnoreCase);
// {{-s mini|III|e}} et {{Ier siècle av. J.-C.}} -> {{-sp|III|e|et|I|er|s}}
// {{VIe s}} et {{IIe siècle av. J.-C.}}s -> {{-sp|VI|e|et|II|e|s}}
ArticleText = Regex.Replace(ArticleText,
@"\{\{(?:-?s[ _]+mini\|([XVI]+)\|(er?)|([XVI]+)(er?) s(?:iècle)?)\}\}\s+(et|au|-|/)\s+\{\{([XVI]+)(er?)[ _]+siècle[ _]+[aA]v\.[ _]+J\.-C\.\}\}",
"{{-sp|$1$3|$2$4|$5|$6|$7|s}}",
RegexOptions.IgnoreCase);
// {{s mini-|VI|e}} et {{s-|VII|e}} -> {{sp-|VI|e|et|VII|e|s}}
ArticleText = Regex.Replace(ArticleText,
@"\{\{(?:s mini(-)?\|([XVI]+)\|(er?)|([XVI]+)(er?))\}\}\s*(et|au|-|/)\s*\{\{s(?: mini)?-?\|([XVI]+\|er?)\}\}(?:\s+si[éeè]cles?|s)?",
"{{sp$1|$2$4|$3$5|$6|$7|s}}",
RegexOptions.IgnoreCase);
// {{s mini|XII|e}} et {{XIIIe siècle}} -> {{sp|XII|e|et|XIII|e|s}}
// {{XVIe s}} et {{XVIIe siècle}}s -> {{sp|XVI|e|et|XVII|e|s}}
ArticleText = Regex.Replace(ArticleText,
@"\{\{(?:s[ _]+mini\|([XVI]+)\|(er?)|([XVI]+)(er?) s(?:iècle)?)\}\}\s+(et|au|-|/)\s+\{\{([XVI]+)(er?)[ _]+siècle\}\}s?",
"{{sp|$1$3|$2$4|$5|$6|$7|s}}",
RegexOptions.IgnoreCase);
if (ArticleText != ArticleTextOld)
{
Summary += "wikif siècle, ";
ArticleTextOld = ArticleText;
}
// [[Machin IIe truc]] -> [[Machin IIe truc|Machin II{{e}} truc]]
ArticleText = Regex.Replace(ArticleText,
@"(?<=\[\[(?!Catégorie:)):?([^\|\]]*\b(?:[XVI]+|\d+))(er?|re|nd)\b([^\|]*?)(?=\]\])",
"$&|$1{{$2}}$3");
// 1re -> 1{{re}}
ArticleText = Regex.Replace(ArticleText,
@"(?<!\[\[(?:.(?<!\]\]|\|))*|\[\[Catégorie\s*:(?:.(?<!\]\]))*|\{\{(?:.(?<!\}\}|\|))*)(?<=\b1) *(?:\{\{(?:[Ee]xp\|)?|<sup(?: +class=""exposant"")?>)?[éeè]?re(s)?\b(?:\}\}|</sup>)?",
"{{re$1}}");
// 1er -> 1{{er}}
ArticleText = Regex.Replace(ArticleText,
@"(?<!\[\[(?:.(?<!\]\]|\|))*|\[\[Catégorie\s*:(?:.(?<!\]\]))*|\{\{(?:.(?<!\}\}|\|))*)(?<=\b1) *(?:\{\{(?:[Ee]xp\|)?|<sup(?: +class=""exposant"")?>)?(?:i?[éeè]me\b|[éeè]\b|°(?! *[CF]\b| *\d+['′])|er)(?:\}\}|</sup>)?",
"{{er}}");
// 2nd -> 2{{nd}}
ArticleText = Regex.Replace(ArticleText,
@"(?<!\[\[(?:.(?<!\]\]|\|))*|\[\[Catégorie\s*:(?:.(?<!\]\]))*|\{\{(?:.(?<!\}\}|\|))*)(?<=\b2) *(?:\{\{(?:[Ee]xp\|)?|<sup(?: +class=""exposant"")?>)?(?:nd|n?d(es?))(?:\}\}|</sup>)?",
"{{nd$1}}");
// 3e -> 3{{e}}
ArticleText = Regex.Replace(ArticleText,
@"(?<!\[\[(?:.(?<!\]\]|\|))*|\[\[Catégorie\s*:(?:.(?<!\]\]))*|\{\{(?:.(?<!\}\}|\|))*)(?<=\b\d+) *(?:\{\{(?:[Ee]xp\|)?|<sup(?: +class=""exposant"")?>)?(?:i?[éeè]me\b|[éeè]\b|°(?! *[CF]\b| *\d+['′]))(?:\}\}|</sup>)?",
"{{e}}");
if (ArticleText != ArticleTextOld)
{
Summary += "wikif nième, ";
ArticleTextOld = ArticleText;
}
Summary = Summary.Trim(", ".ToCharArray());
return ArticleText;
}
// fonctions bandeaux
private string NettoyageBandeau(Match m)
{
string bandeau;
string bandeaux = "";
foreach (string b in m.Groups[2].Value.Split(new Char[] { '|' }))
{
bandeau = b.Trim();
bandeau = Regex.Replace(bandeau, "_", " ");
bandeau = Regex.Replace(bandeau, " +", " ");
bandeau = Regex.Replace(bandeau, @"^(?:(?::? *fr *: *)?(?:Modèle|Template) *: *)?", "");
bandeau = Tools.TurnFirstToUpper(bandeau);
if (Regex.IsMatch(bandeau, @"^(?:Ébauche(?:s?| .+)|Portail .+)$", RegexOptions.IgnoreCase))
{
if (Array.IndexOf(BandeauxUniques, bandeau) == -1)
{
BandeauxUniques[nbBU] = bandeau;
nbBU++;
bandeaux += bandeau + "|";
}
}
else
{
bandeaux += bandeau + "|";
}
}
if (bandeaux == String.Empty)
{
return String.Empty;
} else {
return m.Groups[1].Value + bandeaux.Trim(new Char[] { '|' }) + m.Groups[3].Value;
}
}
private string MultiBandeau(Match m)
{
string bandeaux = "";
foreach (Capture c in m.Groups[1].Captures)
{
bandeaux += "|" + c.Value;
}
return "{{Multi bandeau" + bandeaux + "}}" + m.Groups[2].Captures[m.Groups[2].Captures.Count-1];
}
// fonctions catégories et defaultsort
private string UpFirstChar(Match m)
{
return m.Value.ToUpper();
}
private string CleanKey(string key)
{
key = Regex.Replace(key.Replace("_"," "), @" +", " ");
string[] tr1 = new string[48] { "0⅛¼⅜⅝⅞½¾٠۰", "1١۱", "2٢۲", "3٣۳", "4٤۴", "5٥۵", "6٦۶", "7٧۷", "8٨۸", "9٩۹", "aáàâãäåăąā", "AÁÀÂÃÄÅĂĄĀ", "cçćĉčċ", "CÇĆĈČĊ", "dðďđ", "DÐĎĐ", "eéèêëěėęē", "EÉÈÊËĚĖĘĒ", "gğĝġģ", "GĞĜĠĢ", "hĥħ", "HĤĦ", "iíìîïıĩįī", "IÍÌÎÏİĨĮĪ", "jĵ", "JĴ", "kĸķ", "KĶ", "lĺľłļŀ", "LĹĽŁĻĿ", "nñʼnńňņ", "NÑŃŇŅ", "oóòôõöøőō", "OÓÒÔÕÖØŐŌ", "rŕřŗ", "RŔŘŖ", "sśŝšş", "SŚŜŠŞ", "tťŧţ", "TŤŦŢ", "uúùûüŭůűũųū", "UÚÙÛÜŬŮŰŨŲŪ", "wŵ", "WŴ", "yýÿŷ", "YÝŶŸ", "zźžż", "ZŹŽŻ" };
string[] tr2 = new string[8] { "aeæ", "AEÆ", "oeœ", "OEŒ", "ssß", "ijij", "ngŋ", "NGŊ" };
foreach (string t in tr1)
{
key = Regex.Replace(key, @"[" + t.Substring(1) + @"]", t.Substring(0, 1));
}
foreach (string t in tr2)
{
key = Regex.Replace(key, @"[" + t.Substring(2) + @"]", t.Substring(0, 2));
}
return key;
}
private string Key(Match m)
{
return "DEFAULTSORT:" + CleanKey(m.Groups[1].Value);
}
private string formatnum2(Match m)
{
string number = m.Value;
number = Regex.Replace(number, @"[\. ]|&(?:nb|thin)sp;", "");
number = Regex.Replace(number, @",", ".");
return "{{formatnum:" + number + "}}";
}
private string formatnum1(Match m)
{
string number = m.Value;
number = Regex.Replace(number, @"[, ]|&(?:nb|thin)sp;", "");
return "{{formatnum:" + number + "}}";
}
// à copier/coller dans le module jusqu'ici