import re def clean_text(text): # Удаляем табуляции, неразрывные пробелы, кавычки и обратные кавычки text = re.sub(r'[\t\xa0"`]+', ' ', text) text = re.sub(r'\*{2,}', '', text) return re.sub(r'\s+', ' ', text).strip() def parse_post(text): cleaned = clean_text(text) results = [] pattern = re.compile(r'(?:логин|login)[\s:]*([^\s\n]+)[^\n\r]*(?:пароль|password)[\s:]*([^\s\n]+)', re.IGNORECASE) alt_login = re.findall(r'(?:логин|login)[\s:]*([^\s\n]+)', cleaned, re.IGNORECASE) alt_pass = re.findall(r'(?:пароль|password)[\s:]*([^\s\n]+)', cleaned, re.IGNORECASE) matches = pattern.findall(cleaned) for login, password in matches: if 3 <= len(login) <= 50 and 4 <= len(password) <= 300: results.append((login.strip(), password.strip())) if not results and len(alt_login) == len(alt_pass): for login, password in zip(alt_login, alt_pass): if 3 <= len(login) <= 50 and 4 <= len(password) <= 300: results.append((login.strip(), password.strip())) return results