Bikarhêner:Balyozxane/rastker.py

import pywikibot
import re
from pywikibot import pagegenerators
from pywikibot.bot import AutomaticTWSummaryBot, ConfigParserBot, SingleSiteBot
import re

def process_section(page_title: str, lang_code: str, section: str) -> str:
    section = re.sub(r'#:\s*{{\s*(bêmînak|mînak\?)(.*?)}}', f'#* {{{{bêmînak|{lang_code}}}}}', section)
    
    # Define the regex pattern for allowed section titles
    allowed_sections_pattern = re.compile(r'(Mane|Navdêr|Lêker|Rengdêr|Hoker|Serenav|Baneşan|Bazinedaçek|Biwêj|Cînav|Daçek|Girêdek|Gotineke pêşiyan|Hevok|Paşdaçek|Pêşdaçek)\s*(\d+)?')

    # Define the regex patterns for lines to be skipped
    skip_patterns = [
        re.compile(r'{{guha?e?rto'),
        re.compile(r'{{bi?ne?ê?re?2?3?'),
        re.compile(r"''Bin[êe]re''"),
        re.compile(r"{{kontrol"),
        re.compile(r"{{rastnivîs"),
        re.compile(r"{{şaşnivîs"),
        re.compile(r"{{(nk|navkes)\|"),
        re.compile(r"{{(ku-Arab|ckb-Latn)\|"),
        re.compile(r"{{(awayekî din|ad)\|"),
    ]

    # Split the section into lines
    lines = section.split('\n')

    # Create a modified version of the section
    modified_section = ""

    # Flag to indicate if we are within an allowed section
    within_allowed_section = False

    # Iterate through lines
    for i in range(len(lines)):
        line = lines[i].strip()

        # Check if the line contains patterns to be skipped
        if any(pattern.search(line) for pattern in skip_patterns):
            # Skip the line but don't remove it
            modified_section += line + '\n'
            continue

        # Check if the line is the start of a new section
        if line.startswith('=='):
            within_allowed_section = False

        if within_allowed_section:
            # Perform replacements only within the allowed sections
            if line.startswith('#') and not (line.startswith('#:') or line.startswith('#*')) and (i + 1 >= len(lines) or not re.search(r'^#[:\*]', lines[i + 1].strip())):
                replacement = f'{line}\n#* {{{{bêmînak|{lang_code}}}}}'
            else:
                replacement = line

            modified_section += replacement + '\n'
        else:
            # Copy lines outside of the allowed sections without modifications
            modified_section += line + '\n'

        # Check if the line is the start of an allowed section
        if allowed_sections_pattern.search(line):
            within_allowed_section = True

    return modified_section.rstrip('\n')

def extract_lang_sections(page_text):
    lang_sections = {}
    lang_code_pattern = re.compile(r'==[ ]*?{{ziman\|([^}]+)}}[ ]*?==')
    sections = re.split(r'(==[ ]*?{{ziman\|[^}]+}}[ ]*?==)', page_text)

    for i in range(1, len(sections), 2):
        lang_code = re.search(lang_code_pattern, sections[i]).group(1)
        lang_sections[lang_code] = sections[i + 1]

    return lang_sections

class AppendTextBot(
    SingleSiteBot,
    ConfigParserBot,
    AutomaticTWSummaryBot,
):
    summary_key = 'basic-changing'
    use_redirects = False
    update_options = {
        'summary': None,
        'text': '',
        'top': False,
    }

    def treat_page(self) -> None:
        page = self.current_page
        summary = f"+{{{{[[Şablon:bêmînak|bêmînak]]}}}} (bi [[Taybet:PermanentLink/5895631|rastker.py]]"
        text = page.text

        if "{{maneid|" in text:
            pywikibot.output(f"Skipping page {page.title()} because it contains '{{maneid|'")
            return
            
        # Check if Bilêvkirin section is empty and remove it
        valahi_pattern = re.compile(r' ==\n===')
        if valahi_pattern.search(text):
            text = re.sub(valahi_pattern, ' ==\n\n===', text)

        lang_sections = extract_lang_sections(text)
        modified_sections = {}

        # Process all sections
        for lang_code, section in lang_sections.items():
            modified_section = process_section(page.title(), lang_code, section)

            if modified_section != section:
                modified_sections[lang_code] = modified_section

        # Apply modifications to the entire page
        for lang_code, modified_section in modified_sections.items():
            # Split the page text into lines
            lines = text.split('\n')

            # Find the starting line index of the {{ziman|lang_code}} section
            lang_code_line = f'== {{{{ziman|{lang_code}}}}} =='
            lang_code_index = -1

            for i, line in enumerate(lines):
                if line.strip() == lang_code_line:
                    lang_code_index = i + 1
                    break

            if lang_code_index != -1:
                next_lang_code_index = lang_code_index + 1
                while next_lang_code_index < len(lines) and not lines[next_lang_code_index].strip().startswith('== {{ziman|'):
                    next_lang_code_index += 1

                # Update the page text with the modified section only within the current lang_code section
                text = '\n'.join(lines[:lang_code_index + 1] + [modified_sections[lang_code].strip() + "\n"] + lines[next_lang_code_index:]).strip()

        # Save the modified page
        if modified_sections:
            self.put_current(text, summary=summary)
        else:
            pywikibot.output("No meaningful changes detected. Skipping save.")

def main(*args: str) -> None:
    local_args = pywikibot.handle_args(args)
    gen_factory = pagegenerators.GeneratorFactory()
    local_args = gen_factory.handle_args(local_args)

    options = {'text': ''}

    for arg in local_args:
        option, _, value = arg.partition(':')
        if option in ('summary', 'text'):
            if not value:
                pywikibot.input(f'Please enter a value for {option}')
            options[option] = value
        else:
            options[option] = True

    gen = gen_factory.getCombinedGenerator(preload=True)

    if not pywikibot.bot.suggest_help(missing_generator=not gen):
        bot = AppendTextBot(generator=gen, **options)
        bot.run()

if __name__ == '__main__':
    main()