There are a lot of traditional word documents in the folder, and you want to convert them to simplified ones in batches:
OpenCC libraries can be used. OpenCC (Open Chinese Convert) is an open-source Chinese conversion library that aims to provide high-quality conversion capabilities. It supports multiple programming language interfaces, including C++, Python, Java, and JavaScript, making it easy for developers from different backgrounds to integrate into their applications.
Enter the prompt word in ChatGPT:
Write a Python script to complete the task of converting traditional Chinese to simplified Chinese by following the following steps:
It says: "F:\aivideo";
Use the win32com library to read all the DOCX documents in it;
用OpenCC库将docx文档中的繁体字转为简体字;
Only convert the font, and keep the original layout unchanged, including the pictures, tables, and formatting in the original word document.
Note: Each step has to output information to the screen
To ensure that the images, tables, and formatting in your document remain intact during the conversion process, you need to ensure that only the text content is modified and nothing else is affected.
In a Python environment, OpenCC can be installed using the pip command:
pip install opencc
Source:
import os
from win32com import client
from opencc import OpenCC
def convert_text_traditional_to_simplified(text, opencc):
"""Convert text from Traditional Chinese to Simplified Chinese using OpenCC."""
return opencc.convert(text)
def process_word_document(doc_path, output_path):
# Initialize OpenCC for traditional to simplified conversion
opencc = OpenCC('t2s')
# Start Word application
print(f"正在读取文件: {doc_path}")
word = client. Dispatch("Word.Application")
word. Visible = False
doc = word. Documents.Open(doc_path)
# Iterate through paragraphs and convert text
for paragraph in doc. Paragraphs:
original_text = paragraph. Range.Text
converted_text = convert_text_traditional_to_simplified(original_text, opencc)
if original_text != converted_text:
paragraph. Range.Text = converted_text
# Iterate through tables and convert text
for table in doc. Tables:
for row in table. Rows:
for cell in row. Cells:
original_text = cell. Range.Text
converted_text = convert_text_traditional_to_simplified(original_text, opencc)
if original_text != converted_text:
cell. Range.Text = converted_text
# Save the converted document
print(f"Saving converted file to: {output_path}")
Doc. SaveAs(output_path)
doc. Close(False)
word. Quit()
def process_folder(folder_path):
if not os.path.exists(folder_path):
print(f"文件夹不存在: {folder_path}")
return
docx_files = [f for f in os.listdir(folder_path) if f.endswith('.docx')]
if not docx_files:
print("No .docx file found in folder")
return
for docx_file in docx_files:
docx_path = os.path.join(folder_path, docx_file)
output_path = os.path.join(folder_path, f"converted_{docx_file}")
# Convert the docx file
process_word_document(docx_path, output_path)
print(f"文件转换完成: {output_path}")
# Specify the folder path
folder_path = "F:\aivideo"
# Process the folder
process_folder(folder_path)
Convert the finished word document: