Utilisateur:NaggoBot/args dedup.py

# -*- coding: utf-8 -*-
import pywikibot, mwparserfromhell
import difflib
import sys
import re

# key without spaces

def stripKey(arg):
  if '=' in str(arg):
    return str(arg).split('=')[0].strip()
  else:
    return None

# value without leading or trailing spaces

def stripValue(arg):
  if '=' in str(arg):
    return "=".join(str(arg).split('=')[1:]).strip()
  else:
    return None

# list of args used in the template

def getAllArgs(template):
  allArgs=[]
  for arg in template.params:
    argKey=stripKey(arg)
    if '=' in str(arg) and argKey != "":
      allArgs.append(argKey)
  return allArgs

# list of args used more than once

def getDuplicateArgs(template):
  allArgs=getAllArgs(template)
  duplicateArgs=[]
  prev=None
  for arg in sorted(allArgs):
    if arg==prev:
      if arg not in duplicateArgs:
        duplicateArgs.append(arg)
    prev=arg
  return duplicateArgs

# list of unnamed args
def getUnnamedArgs(template):
  unnamedArgs=[]
  for arg in template.params:
    if not '=' in str(arg):
      unnamedArgs.append(arg)
  return unnamedArgs
  
# first 500 chars of the template

def sampleTemplate(template):
  return str(template)[0:500].replace("\n","").replace("\r","")

# rename the key of the nth (n=argToRename) occurrence of the duplicate argument call
  
def renameArgInTemplate(template, argToRename, dupArg):
  indexInArg=0
  newName=input("New name for argument, change %s to :" % dupArg)
  
  for index, param in enumerate(template.params):
    if stripKey(param)==dupArg:
      indexInArg+=1
      if indexInArg == argToRename:
        template.params[index]=template.params[index].replace(dupArg, newName, 1)

# keep only the nth (n=valueToKeep) occurrence of the duplicate argument call

def keepOneValueInTemplate(template, valueToKeep, dupArg):
  indexInArg=0
  for index, param in enumerate(template.params):
    if stripKey(param)==dupArg:
      indexInArg+=1
      if indexInArg != valueToKeep:
        template.params[index]="argumentToBeDeleted=delete"
  while "argumentToBeDeleted=delete" in template.params:
    template.params.pop(template.params.index("argumentToBeDeleted=delete"))

def templateOK(template):
  if template.name.matches("Climat"):
    return False
  if template.name.matches("Foot classement"):
    return False
  if template.name.matches("Phase finale à 4"):
    return False
  if template.name.matches("Phase finale à 6"):
    return False
  if template.name.matches("Phase finale à 8"):
    return False
  if template.name.matches("Phase finale à 16"):
    return False
  if template.name.matches("Tournoi sur 3 tours"):
    return False
  if template.name.matches("Fstats total"):
    return False
  if template.name.matches("Fstats"):
    return False
  if "Tableau Coupe" in template.name:
    return False
  for x in [str(n) for n in range(1,10)]:
    if x in template.name:
      return False
  return True

# main function : deduplicate a single page
  
def deduplicatePage(page):
  pageHasDuplicate=False
  title=page.title()
  if dupCateg not in page.categories():
    print("%s not in category, skipping" % title)
    return
  text=page.get()
  print("= Page : ",title,' ',page.full_url())
  wikicode = mwparserfromhell.parser.Parser().parse(text, skip_style_tags=True)
  templates = wikicode.filter_templates()
  nDup=0
  for template in templates:
    duplicateArgs=getDuplicateArgs(template)
    unnamedArgs=getUnnamedArgs(template)
    if len(duplicateArgs) > 0:
      print("In template %s :" % sampleTemplate(template))
    for arg in template.params:
      if re.match("^[1-9][0-9]*$",str(stripKey(arg))):
        num=int(stripKey(arg))
        if len(unnamedArgs)>=num:
          print("Argument %s in template %s conflicts with unnamed argument %s - edit manually" % (arg, template.name, unnamedArgs[num-1]))
    for dupArg in duplicateArgs:
      print("* Argument %s has multiple occurrences :" % dupArg)
      pageHasDuplicate=True
      firstValue=None
      hasDistinctValues=False
      hasNonEmptyValues=False
      numberOfValues=0
      values=[]
      lastValue=0
      for arg in template.params:
        if stripKey(arg) == dupArg:
          numberOfValues+=1
          value=stripValue(arg)
          values.append(value)
          if firstValue is None and value not in ["", None]:
            firstValue=value
            hasNonEmptyValues=True
              
          if value not in ["",None]: 
            lastValue=numberOfValues
            if firstValue != value:
              hasDistinctValues=True
      i=0
      nDup+=1
      for value in values:
        i+=1
        print(" * Value %d : %s"	 % (i , value))
      # automatically select an option in simple cases
      if '-auto' in sys.argv:
        if templateOK(template) and not hasDistinctValues and not (dupArg[-1:] in [str(n) for n in range(1,10)]) and (lastValue==i or not hasNonEmptyValues):
          action=str(i)
        else:
          action="s"
      else:
        action=input("s=skip this argument, n=skip this article, 1-%d = keep this value only, r1-r%d = rename this argument :" % (i, i))
      if action=="s":
        continue
      if action=="n":
        return
      valueToKeep=None
      argToRename=None
      if action >= "1" and action <= str(i):
        valueToKeep=int(action)
        keepOneValueInTemplate(template, valueToKeep, dupArg)
      if action >= "r1" and action <= "r"+str(i):
        argToRename=int(action[1:2])
        renameArgInTemplate(template, argToRename, dupArg)
  print("".join(difflib.context_diff([x + "\n" for x in text.split("\n")],[x + "\n" for x in str(wikicode).split("\n")], "before","after")))
  if str(wikicode).strip() != text.strip():
    page.put(str(wikicode),message, asynchronous=True)       
  else:
    if nDup==0 and "-null" in sys.argv:
      action=input("No duplicate found. Null edit ? y/n")
      if action=="y":
        page.put(text,message, asynchronous=True)       

# deduplicate all pages in a category

def parseCateg(categoryName, fromPage=None, namespace=None):
  page=pywikibot.Page(site,categoryName)
  for article in site.categorymembers(page, namespaces=namespace):
    if fromPage is None or article.title() >= fromPage:
      deduplicatePage(article)
#site=pywikibot.Site('ja', 'wiktionary')
#site=pywikibot.Site('fr')
namespace=None
if '-namespace' in sys.argv:
  namespace=sys.argv[sys.argv.index("-namespace")+1]
if '-lang' in sys.argv:
  lang=sys.argv[sys.argv.index("-lang")+1]
else:
  lang='fr'
if '-project' in sys.argv:
  project=sys.argv[sys.argv.index("-project")+1]
else:
  project='wikipedia'
site=pywikibot.Site(lang, project)
# Find out the category
nsCateg=site.namespace(14)
nameCateg=site.mediawiki_message("Duplicate-args-category")
dupCategName=nsCateg+":"+nameCateg
print(dupCategName)
dupCateg=pywikibot.Page(site, dupCategName)
if '-message' in sys.argv:
  message=sys.argv[sys.argv.index("-message")+1]
else:
  if lang=='fr':
    message="[[%s|Correction de modèles utilisant des arguments dupliqués]]" % dupCategName
  else:
    message="[[%s|%s]]" % (dupCategName, nameCateg)
if '-main' in sys.argv:
  namespace='0'
if '-cat' in sys.argv or '-catdup' in sys.argv:
  if '-catdup' in sys.argv:
    categoryName=dupCategName
  else:
    categoryName=sys.argv[sys.argv.index("-cat")+1]
  if '-frompageincat' in sys.argv:
    fromPage=sys.argv[sys.argv.index("-frompageincat")+1]
    print("Parsing category %s from article %s" % (categoryName,fromPage))
  else:
    fromPage=None
  parseCateg(categoryName, fromPage, namespace=namespace)

if '-page' in sys.argv:
  pageName=sys.argv[sys.argv.index("-page")+1]
  page=pywikibot.Page(site, pageName)
  deduplicatePage(page)

if '-linksto' in sys.argv:
  pageName=sys.argv[sys.argv.index("-linksto")+1]
  page=pywikibot.Page(site, pageName)
  for ref in page.getReferences(namespaces=namespace):
    deduplicatePage(ref)