Преобразование кодировки MacCyrillic в именах файлов в IBM866 в файловой системе NTFS

После копирования файлов из системы MacOS в общий ресурс Windows через samba я получил такие имена файлов, как:

Сђ•вл
К†в†ЂЃ¶≠л• Ђ®бвл.pdf
П†бѓЃав.doc

И обычно они должны выглядеть так:

Сметы
Каталожные листы.pdf
Паспорт.doc

В некоторых случаях я также получал символ U + F028 в конце name:

Новые

Есть ли способ автоматически определять и преобразовывать такие файлы на компьютере с Windows?

0
задан 4 October 2020 в 13:48
1 ответ

Я закончил с написанием собственного сценария...
Код плохой и мало тестировался, но в моем случае работает.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""
(Not) Simple MacCyrillic -> IBM866 converting script.
"""

import sys
if sys.version_info.major == 2:
    print("Please run it with Python 3.4 or better.")
    exit()

import os
if os.name != 'posix':
    print("This script works only on GNU/Linux.")
    exit(1)

import subprocess

BadChars = ['©', '•', '≠', '£', '¢', '†', 'ѓ', 'Ѓ', 'Ђ', '§', '¶', 'ђ', '®', '°', 'Ґ']

replaceTable = {
            '©': 'й', 
            'ж': 'ц',
            'г': 'у',
            '™': 'к',
            '•': 'е',
            '≠': 'н',
            '£': 'г',
            'и': 'ш',
            'й': 'щ',
            'І': 'з',
            'е': 'х',
            'к': 'ъ',
            'д': 'ф',
            'л': 'ы',
            '¢': 'в',
            '†': 'а',
            'ѓ': 'п',
            'а': 'р',
            'Ѓ': 'о',
            'Ђ': 'л',
            '§': 'д',
            '¶': 'ж',
            'н': 'э',
            'п': 'я',
            'з': 'ч',
            'б': 'с',
            'ђ': 'м',
            '®': 'и',
            'в': 'т',
            'м': 'ь',
            '°': 'б',
            'о': 'ю',
            'Ґ': 'в'
            }

def check_all_path_string(path, BadChars):
    for symbol in BadChars:
        if symbol in path:
            return True 
    return False

def check_part_of_name(part_of_name, BadChars):
    for letter in part_of_name:
        if letter in BadChars:
            return True
    
    return False

def replace_symbols(part_of_name, replaceTable):
    changed_part_of_name = ""

    for sym in part_of_name:
        if sym in list(replaceTable.keys()):
            changed_part_of_name += replaceTable[sym]
        else:
            changed_part_of_name += sym
    
    return changed_part_of_name
            
def check_part_of_bad_path(bad_file_name_list, BadChars, replaceTable):
    replaced_path = ""
    for part_of_name in bad_file_name_list:
        if not check_part_of_name(part_of_name, BadChars):
            replaced_path += "/" + part_of_name
        else:
            replaced_part_of_name = replace_symbols(part_of_name, replaceTable)
            replaced_path += "/" + replaced_part_of_name
    
    if "//" in replaced_path:
        replaced_path = replaced_path.replace("//", "/")

    return replaced_path

def main_validation(files, BadChars, replaceTable):
    validated_list = []

    for file_name in files:
        if check_all_path_string(file_name, BadChars):
            bad_file_name = file_name
            bad_file_name_list = bad_file_name.split("/")

            replaced_path = check_part_of_bad_path(bad_file_name_list, 
                                                    BadChars, 
                                                    replaceTable)
            validated_list.append(replaced_path)
        
        else:

            validated_list.append(file_name)

    return validated_list

def grab_files(folder, find_type):
    files = subprocess.run(["find", folder, "-type", find_type], stdout=subprocess.PIPE).stdout.decode('utf-8')
    files = files.splitlines()

    return files

if __name__ == "__main__":
    folder = os.getcwd()
    find_type = "d" # 'd' for directories or 'f' for files
    print("Grab files from", folder)
    files = grab_files(folder, find_type)
    print("Starting validation...")
    validated = main_validation(files, BadChars, replaceTable)
    print("Computing diff... (this can take a long time)")
    files_diff = [elem for elem in files if elem not in validated ]
    validated_diff = [elem for elem in validated if elem not in files ]

    print("Overall source count:", len(files))
    print("Validated diff:", len(validated_diff))

    i = 0
    while i < len(validated_diff):
        print(files_diff[i], '->', validated_diff[i])
        i = i + 1
    
    print("\nProceed?")
    choice = input("[Y]es | [N]o > ")
    if choice == 'y' or choice == 'Y':
        i = 0
        while i < len(validated_diff):
            source = '"' + files_diff[i] + '"'
            dest = '"' + validated_diff[i] + '"'
            os.system('mv -i ' + source + ' ' + dest)
            i = i + 1
    elif choice == 'n' or choice == 'N':
        print("Sure, it's okay. Thanks for playing!")
        exit()
    else:
        print("Sorry, I don't understand you.")
        print("Assuming as negative, exiting...")
        exit()
    
0
ответ дан 8 October 2020 в 17:34

Теги

Похожие вопросы