read_log.py 1.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162
  1. import chardet
  2. def detect_file_encoding(file_path):
  3. """
  4. 检测文件编码
  5. Args:
  6. file_path: 文件路径
  7. Returns:
  8. 检测到的编码类型和置信度
  9. """
  10. # 读取文件的二进制内容
  11. with open(file_path, 'rb') as file:
  12. raw_data = file.read()
  13. # 使用chardet检测编码
  14. result = chardet.detect(raw_data)
  15. return result
  16. def read_file_with_encoding(file_path, encoding=None):
  17. """
  18. 使用指定编码读取文件,如果没有指定编码则先检测编码
  19. Args:
  20. file_path: 文件路径
  21. encoding: 指定的编码类型(可选)
  22. Returns:
  23. 文件内容
  24. """
  25. if not encoding:
  26. detected = detect_file_encoding(file_path)
  27. encoding = detected['encoding']
  28. print(f"检测到的编码: {detected}")
  29. try:
  30. with open(file_path, 'r', encoding=encoding) as file:
  31. content = file.read()
  32. return content
  33. except UnicodeDecodeError as e:
  34. print(f"使用 {encoding} 解码失败: {e}")
  35. return None
  36. file_path = '/Users/maxfeng/Downloads/log.txt'
  37. # 先检测编码
  38. result = detect_file_encoding(file_path)
  39. print(f"文件编码检测结果: {result}")
  40. code = result['encoding']
  41. # 然后用检测到的编码读取文件
  42. content = read_file_with_encoding(file_path, code)
  43. print("\n文件内容:")
  44. print(content)
  45. # with open('/Users/maxfeng/Downloads/log.txt', 'r', encoding='GB2312') as f:
  46. # content = f.read()
  47. with open('/Users/maxfeng/Downloads/log2.txt', 'w', encoding='utf-8') as f:
  48. f.write(content)