| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162 |
- import chardet
- def detect_file_encoding(file_path):
- """
- 检测文件编码
-
- Args:
- file_path: 文件路径
-
- Returns:
- 检测到的编码类型和置信度
- """
- # 读取文件的二进制内容
- with open(file_path, 'rb') as file:
- raw_data = file.read()
-
- # 使用chardet检测编码
- result = chardet.detect(raw_data)
-
- return result
- def read_file_with_encoding(file_path, encoding=None):
- """
- 使用指定编码读取文件,如果没有指定编码则先检测编码
-
- Args:
- file_path: 文件路径
- encoding: 指定的编码类型(可选)
-
- Returns:
- 文件内容
- """
- if not encoding:
- detected = detect_file_encoding(file_path)
- encoding = detected['encoding']
- print(f"检测到的编码: {detected}")
-
- try:
- with open(file_path, 'r', encoding=encoding) as file:
- content = file.read()
- return content
- except UnicodeDecodeError as e:
- print(f"使用 {encoding} 解码失败: {e}")
- return None
- file_path = '/Users/maxfeng/Downloads/log.txt'
- # 先检测编码
- result = detect_file_encoding(file_path)
- print(f"文件编码检测结果: {result}")
- code = result['encoding']
- # 然后用检测到的编码读取文件
- content = read_file_with_encoding(file_path, code)
- print("\n文件内容:")
- print(content)
- # with open('/Users/maxfeng/Downloads/log.txt', 'r', encoding='GB2312') as f:
- # content = f.read()
-
- with open('/Users/maxfeng/Downloads/log2.txt', 'w', encoding='utf-8') as f:
- f.write(content)
|