|
|
@@ -0,0 +1,670 @@
|
|
|
+# 交易配对分析工具
|
|
|
+# 用于从交易记录CSV文件中为开仓/平仓交易进行配对,为每对关联交易分配相同ID
|
|
|
+
|
|
|
+import pandas as pd
|
|
|
+import numpy as np
|
|
|
+from datetime import datetime
|
|
|
+import re
|
|
|
+import os
|
|
|
+import warnings
|
|
|
+warnings.filterwarnings('ignore')
|
|
|
+
|
|
|
+
|
|
|
+def _get_current_directory():
|
|
|
+ """
|
|
|
+ 获取当前文件所在目录
|
|
|
+
|
|
|
+ 返回:
|
|
|
+ str: 当前目录路径
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ current_dir = os.path.dirname(os.path.abspath(__file__))
|
|
|
+ except NameError:
|
|
|
+ current_dir = os.getcwd()
|
|
|
+ return current_dir
|
|
|
+
|
|
|
+
|
|
|
+def read_transaction_csv(csv_path):
|
|
|
+ """
|
|
|
+ 读取交易记录CSV文件,支持多种编码格式
|
|
|
+
|
|
|
+ 参数:
|
|
|
+ csv_path (str): CSV文件路径
|
|
|
+
|
|
|
+ 返回:
|
|
|
+ pandas.DataFrame: 包含交易记录的DataFrame
|
|
|
+ """
|
|
|
+ encodings = ['gbk', 'utf-8-sig', 'utf-8', 'gb2312', 'gb18030', 'latin1']
|
|
|
+
|
|
|
+ for encoding in encodings:
|
|
|
+ try:
|
|
|
+ df = pd.read_csv(csv_path, encoding=encoding)
|
|
|
+ print(f"成功使用 {encoding} 编码读取CSV文件")
|
|
|
+ print(f"从CSV文件中读取到 {len(df)} 条记录")
|
|
|
+ return df
|
|
|
+ except UnicodeDecodeError:
|
|
|
+ continue
|
|
|
+ except Exception as e:
|
|
|
+ if encoding == encodings[-1]:
|
|
|
+ print(f"读取CSV文件时出错: {str(e)}")
|
|
|
+ raise
|
|
|
+ continue
|
|
|
+
|
|
|
+ print(f"无法使用任何编码格式读取CSV文件: {csv_path}")
|
|
|
+ return pd.DataFrame()
|
|
|
+
|
|
|
+
|
|
|
+def parse_transaction_data(df):
|
|
|
+ """
|
|
|
+ 解析交易数据,提取关键信息
|
|
|
+
|
|
|
+ 参数:
|
|
|
+ df (pandas.DataFrame): 原始交易数据
|
|
|
+
|
|
|
+ 返回:
|
|
|
+ pandas.DataFrame: 添加了解析字段的DataFrame
|
|
|
+ """
|
|
|
+ df = df.copy()
|
|
|
+
|
|
|
+ # 提取标的(完整信息用于匹配)
|
|
|
+ df['标的_完整'] = df['标的'].astype(str).str.strip()
|
|
|
+
|
|
|
+ # 提取交易类型
|
|
|
+ df['交易类型_标准'] = df['交易类型'].astype(str).str.strip()
|
|
|
+
|
|
|
+ # 判断是开仓还是平仓
|
|
|
+ df['仓位操作'] = df['交易类型_标准'].apply(lambda x: '开仓' if '开' in x else ('平仓' if '平' in x else '未知'))
|
|
|
+
|
|
|
+ # 判断方向(多/空)
|
|
|
+ df['方向'] = df['交易类型_标准'].apply(lambda x: '多' if '多' in x else ('空' if '空' in x else '未知'))
|
|
|
+
|
|
|
+ # 从成交数量中提取数字(去掉"手"等单位)
|
|
|
+ def extract_quantity(qty_str):
|
|
|
+ """从成交数量字符串中提取数字"""
|
|
|
+ try:
|
|
|
+ qty_str = str(qty_str).strip()
|
|
|
+ # 使用正则提取数字(包括负号和小数点)
|
|
|
+ match = re.search(r'(-?\d+(?:\.\d+)?)', qty_str)
|
|
|
+ if match:
|
|
|
+ return abs(float(match.group(1))) # 返回绝对值
|
|
|
+ return 0
|
|
|
+ except:
|
|
|
+ return 0
|
|
|
+
|
|
|
+ df['成交数量_数值'] = df['成交数量'].apply(extract_quantity)
|
|
|
+
|
|
|
+ # 合并日期和时间为完整时间戳用于排序
|
|
|
+ def parse_datetime(row):
|
|
|
+ """解析日期和时间"""
|
|
|
+ try:
|
|
|
+ date_str = str(row['日期']).strip()
|
|
|
+ time_str = str(row['委托时间']).strip()
|
|
|
+ datetime_str = f"{date_str} {time_str}"
|
|
|
+ return pd.to_datetime(datetime_str)
|
|
|
+ except:
|
|
|
+ return pd.NaT
|
|
|
+
|
|
|
+ df['交易时间'] = df.apply(parse_datetime, axis=1)
|
|
|
+
|
|
|
+ # 过滤掉无效记录
|
|
|
+ df = df[df['成交数量_数值'] > 0].copy()
|
|
|
+ df = df[df['仓位操作'] != '未知'].copy()
|
|
|
+ df = df[df['方向'] != '未知'].copy()
|
|
|
+ df = df[~df['交易时间'].isna()].copy()
|
|
|
+
|
|
|
+ print(f"解析后有效记录: {len(df)} 条")
|
|
|
+
|
|
|
+ return df
|
|
|
+
|
|
|
+
|
|
|
+def fix_incomplete_pairs(df):
|
|
|
+ """
|
|
|
+ 修复不完整的配对(配对ID只出现一次的情况)
|
|
|
+
|
|
|
+ 参数:
|
|
|
+ df (pandas.DataFrame): 已配对的交易数据
|
|
|
+
|
|
|
+ 返回:
|
|
|
+ pandas.DataFrame: 修复后的DataFrame
|
|
|
+ """
|
|
|
+ df = df.copy()
|
|
|
+ max_iterations = 10 # 最大迭代次数,防止无限循环
|
|
|
+ iteration = 0
|
|
|
+
|
|
|
+ print(f"\n开始修复不完整配对...")
|
|
|
+
|
|
|
+ while iteration < max_iterations:
|
|
|
+ iteration += 1
|
|
|
+
|
|
|
+ # 统计每个配对ID出现的次数(排除"未配对"和空值)
|
|
|
+ paired_mask = (df['交易对ID'] != '') & (df['交易对ID'] != '未配对')
|
|
|
+ paired_df = df[paired_mask]
|
|
|
+
|
|
|
+ if len(paired_df) == 0:
|
|
|
+ break
|
|
|
+
|
|
|
+ # 统计每个配对ID出现的次数
|
|
|
+ pair_id_counts = paired_df['交易对ID'].value_counts()
|
|
|
+
|
|
|
+ # 找出只出现一次的配对ID(不完整配对)
|
|
|
+ incomplete_pair_ids = pair_id_counts[pair_id_counts == 1].index.tolist()
|
|
|
+
|
|
|
+ if len(incomplete_pair_ids) == 0:
|
|
|
+ print(f" 迭代 {iteration}: 没有发现不完整配对,修复完成")
|
|
|
+ break
|
|
|
+
|
|
|
+ print(f" 迭代 {iteration}: 发现 {len(incomplete_pair_ids)} 个不完整配对")
|
|
|
+
|
|
|
+ fixed_count = 0
|
|
|
+
|
|
|
+ # 处理每个不完整配对
|
|
|
+ for pair_id in incomplete_pair_ids:
|
|
|
+ # 找到这个配对ID对应的交易
|
|
|
+ pair_mask = df['交易对ID'] == pair_id
|
|
|
+ incomplete_trade = df[pair_mask].iloc[0]
|
|
|
+
|
|
|
+ target = incomplete_trade['标的_完整']
|
|
|
+ direction = incomplete_trade['方向']
|
|
|
+ operation = incomplete_trade['仓位操作'] # 开仓或平仓
|
|
|
+ trade_time = incomplete_trade['交易时间']
|
|
|
+ trade_qty = incomplete_trade['成交数量_数值']
|
|
|
+
|
|
|
+ # 查找可匹配的未配对交易
|
|
|
+ # 条件:相同标的、相同方向、配对ID为空或"未配对"
|
|
|
+ unpaired_mask = (
|
|
|
+ (df['标的_完整'] == target) &
|
|
|
+ (df['方向'] == direction) &
|
|
|
+ ((df['交易对ID'] == '') | (df['交易对ID'] == '未配对')) # 未配对
|
|
|
+ )
|
|
|
+ unpaired_trades = df[unpaired_mask].copy()
|
|
|
+
|
|
|
+ if len(unpaired_trades) == 0:
|
|
|
+ continue
|
|
|
+
|
|
|
+ # 根据不完整交易的类型,查找匹配的交易
|
|
|
+ if operation == '开仓':
|
|
|
+ # 如果是开仓,查找平仓交易(时间在开仓之后)
|
|
|
+ matching_trades = unpaired_trades[
|
|
|
+ (unpaired_trades['仓位操作'] == '平仓') &
|
|
|
+ (unpaired_trades['交易时间'] >= trade_time)
|
|
|
+ ].sort_values('交易时间')
|
|
|
+ else: # operation == '平仓'
|
|
|
+ # 如果是平仓,查找开仓交易(时间在平仓之前)
|
|
|
+ matching_trades = unpaired_trades[
|
|
|
+ (unpaired_trades['仓位操作'] == '开仓') &
|
|
|
+ (unpaired_trades['交易时间'] <= trade_time)
|
|
|
+ ].sort_values('交易时间', ascending=False) # 从后往前,优先匹配最近的
|
|
|
+
|
|
|
+ if len(matching_trades) == 0:
|
|
|
+ continue
|
|
|
+
|
|
|
+ # 使用贪心算法累加匹配的交易
|
|
|
+ remaining_qty = trade_qty
|
|
|
+ matched_indices = []
|
|
|
+
|
|
|
+ for idx in matching_trades.index:
|
|
|
+ match_qty = df.loc[idx, '成交数量_数值']
|
|
|
+
|
|
|
+ if remaining_qty <= 0:
|
|
|
+ break
|
|
|
+
|
|
|
+ if match_qty <= remaining_qty:
|
|
|
+ # 这笔交易可以加入配对
|
|
|
+ matched_indices.append(idx)
|
|
|
+ remaining_qty -= match_qty
|
|
|
+ else:
|
|
|
+ # 交易数量大于剩余需要量,跳过(保持精确匹配)
|
|
|
+ continue
|
|
|
+
|
|
|
+ # 如果找到精确匹配的组合(剩余数量为0或接近0)
|
|
|
+ if len(matched_indices) > 0 and abs(remaining_qty) < 0.01:
|
|
|
+ # 将匹配的交易添加到对应的配对ID中
|
|
|
+ for idx in matched_indices:
|
|
|
+ df.loc[idx, '交易对ID'] = pair_id
|
|
|
+
|
|
|
+ fixed_count += len(matched_indices)
|
|
|
+
|
|
|
+ if operation == '开仓':
|
|
|
+ print(f" 修复: {target} {direction} 1笔开仓({trade_qty:.0f}手) 匹配 {len(matched_indices)}笔平仓 -> {pair_id}")
|
|
|
+ else:
|
|
|
+ print(f" 修复: {target} {direction} 1笔平仓({trade_qty:.0f}手) 匹配 {len(matched_indices)}笔开仓 -> {pair_id}")
|
|
|
+
|
|
|
+ if fixed_count == 0:
|
|
|
+ # 没有修复任何配对,退出循环
|
|
|
+ print(f" 迭代 {iteration}: 无法修复更多配对,停止")
|
|
|
+ break
|
|
|
+
|
|
|
+ print(f" 迭代 {iteration}: 修复了 {fixed_count} 笔交易")
|
|
|
+
|
|
|
+ # 最终检查:统计不完整配对
|
|
|
+ paired_mask = (df['交易对ID'] != '') & (df['交易对ID'] != '未配对')
|
|
|
+ paired_df = df[paired_mask]
|
|
|
+ if len(paired_df) > 0:
|
|
|
+ pair_id_counts = paired_df['交易对ID'].value_counts()
|
|
|
+ incomplete_count = len(pair_id_counts[pair_id_counts == 1])
|
|
|
+ if incomplete_count > 0:
|
|
|
+ print(f"\n警告: 仍有 {incomplete_count} 个不完整配对无法修复")
|
|
|
+ else:
|
|
|
+ print(f"\n修复完成: 所有配对都已完整")
|
|
|
+
|
|
|
+ return df
|
|
|
+
|
|
|
+
|
|
|
+def pair_transactions(df):
|
|
|
+ """
|
|
|
+ 为交易进行配对,分配交易对ID
|
|
|
+
|
|
|
+ 参数:
|
|
|
+ df (pandas.DataFrame): 解析后的交易数据
|
|
|
+
|
|
|
+ 返回:
|
|
|
+ pandas.DataFrame: 添加了交易对ID的DataFrame
|
|
|
+ """
|
|
|
+ df = df.copy()
|
|
|
+ df['交易对ID'] = '' # 初始化交易对ID列
|
|
|
+
|
|
|
+ # 按交易时间排序
|
|
|
+ df = df.sort_values('交易时间').reset_index(drop=True)
|
|
|
+
|
|
|
+ pair_id_counter = 1 # 交易对ID计数器
|
|
|
+ paired_count = 0 # 已配对交易数
|
|
|
+ unpaired_count = 0 # 未配对交易数
|
|
|
+
|
|
|
+ # 按标的分组
|
|
|
+ grouped = df.groupby('标的_完整')
|
|
|
+
|
|
|
+ print(f"\n开始配对,共有 {len(grouped)} 个不同标的")
|
|
|
+
|
|
|
+ for target, group in grouped:
|
|
|
+ # 再按方向分组(多/空)
|
|
|
+ for direction in ['多', '空']:
|
|
|
+ direction_group = group[group['方向'] == direction].copy()
|
|
|
+
|
|
|
+ if len(direction_group) == 0:
|
|
|
+ continue
|
|
|
+
|
|
|
+ # 分离开仓和平仓交易
|
|
|
+ open_trades = direction_group[direction_group['仓位操作'] == '开仓'].copy()
|
|
|
+ close_trades = direction_group[direction_group['仓位操作'] == '平仓'].copy()
|
|
|
+
|
|
|
+ if len(open_trades) == 0 or len(close_trades) == 0:
|
|
|
+ # 标记未配对的交易
|
|
|
+ for idx in direction_group.index:
|
|
|
+ df.loc[idx, '交易对ID'] = f'未配对'
|
|
|
+ unpaired_count += 1
|
|
|
+ continue
|
|
|
+
|
|
|
+ # 第零阶段:优先处理同一时间的交易(特别是数量相等的1开1平)
|
|
|
+ # 按时间分组,处理同一时间点的开仓和平仓
|
|
|
+ time_groups = direction_group.groupby('交易时间')
|
|
|
+ for time_key, time_group in time_groups:
|
|
|
+ time_open_trades = time_group[time_group['仓位操作'] == '开仓'].copy()
|
|
|
+ time_close_trades = time_group[time_group['仓位操作'] == '平仓'].copy()
|
|
|
+
|
|
|
+ if len(time_open_trades) == 0 or len(time_close_trades) == 0:
|
|
|
+ continue
|
|
|
+
|
|
|
+ # 优先匹配数量完全相等的1开1平
|
|
|
+ for close_idx in time_close_trades.index:
|
|
|
+ if df.loc[close_idx, '交易对ID'] != '': # 已配对的平仓交易跳过
|
|
|
+ continue
|
|
|
+
|
|
|
+ close_qty = df.loc[close_idx, '成交数量_数值']
|
|
|
+
|
|
|
+ # 查找同一时间、数量相等的未配对开仓
|
|
|
+ matching_open = time_open_trades[
|
|
|
+ (time_open_trades['成交数量_数值'] == close_qty) &
|
|
|
+ (df.loc[time_open_trades.index, '交易对ID'] == '')
|
|
|
+ ]
|
|
|
+
|
|
|
+ if len(matching_open) > 0:
|
|
|
+ # 找到匹配的开仓,优先使用第一个
|
|
|
+ open_idx = matching_open.index[0]
|
|
|
+ pair_id = f'P{pair_id_counter:04d}'
|
|
|
+ df.loc[open_idx, '交易对ID'] = pair_id
|
|
|
+ df.loc[close_idx, '交易对ID'] = pair_id
|
|
|
+
|
|
|
+ paired_count += 2
|
|
|
+ pair_id_counter += 1
|
|
|
+ print(f" 同时间1开1平匹配: {target} {direction} {close_qty:.0f}手 -> {pair_id}")
|
|
|
+
|
|
|
+ # 第一阶段:多开1平匹配
|
|
|
+ # 遍历每笔平仓交易,查找可以合并匹配的多笔开仓
|
|
|
+ for close_idx in close_trades.index:
|
|
|
+ if df.loc[close_idx, '交易对ID'] != '': # 已配对的平仓交易跳过
|
|
|
+ continue
|
|
|
+
|
|
|
+ close_time = df.loc[close_idx, '交易时间']
|
|
|
+ close_qty = df.loc[close_idx, '成交数量_数值']
|
|
|
+
|
|
|
+ # 查找该平仓交易之前或同一时间的所有未配对开仓交易
|
|
|
+ # 注意:同一时间的1开1平已经在第零阶段处理,这里主要处理多开1平的情况
|
|
|
+ valid_open_trades = open_trades[
|
|
|
+ (open_trades['交易时间'] <= close_time) &
|
|
|
+ (df.loc[open_trades.index, '交易对ID'] == '') # 未被配对的开仓交易
|
|
|
+ ].copy()
|
|
|
+
|
|
|
+ if len(valid_open_trades) == 0:
|
|
|
+ continue
|
|
|
+
|
|
|
+ # 尝试找到开仓数量之和等于平仓数量的组合
|
|
|
+ # 使用贪心算法:按时间顺序累加开仓数量
|
|
|
+ remaining_qty = close_qty
|
|
|
+ paired_open_indices = []
|
|
|
+
|
|
|
+ for open_idx in valid_open_trades.index:
|
|
|
+ open_qty = df.loc[open_idx, '成交数量_数值']
|
|
|
+
|
|
|
+ if remaining_qty <= 0:
|
|
|
+ break
|
|
|
+
|
|
|
+ if open_qty <= remaining_qty:
|
|
|
+ # 这笔开仓可以加入配对
|
|
|
+ paired_open_indices.append(open_idx)
|
|
|
+ remaining_qty -= open_qty
|
|
|
+ else:
|
|
|
+ # 开仓数量大于剩余需要量,跳过(保持精确匹配)
|
|
|
+ continue
|
|
|
+
|
|
|
+ # 如果找到精确匹配的组合(剩余数量为0或接近0)
|
|
|
+ if len(paired_open_indices) > 0 and abs(remaining_qty) < 0.01:
|
|
|
+ # 为配对的交易分配相同的ID
|
|
|
+ pair_id = f'P{pair_id_counter:04d}'
|
|
|
+ for open_idx in paired_open_indices:
|
|
|
+ df.loc[open_idx, '交易对ID'] = pair_id
|
|
|
+ df.loc[close_idx, '交易对ID'] = pair_id
|
|
|
+
|
|
|
+ paired_count += len(paired_open_indices) + 1 # 开仓+平仓
|
|
|
+ pair_id_counter += 1
|
|
|
+
|
|
|
+ if len(paired_open_indices) > 1:
|
|
|
+ print(f" 多开1平匹配: {target} {direction} {len(paired_open_indices)}笔开仓({sum([df.loc[idx, '成交数量_数值'] for idx in paired_open_indices]):.0f}手) 匹配 1笔平仓({close_qty:.0f}手) -> {pair_id}")
|
|
|
+
|
|
|
+ # 第二阶段:1开多平匹配(原有逻辑)
|
|
|
+ # 遍历每笔开仓交易,寻找匹配的平仓交易
|
|
|
+ for open_idx in open_trades.index:
|
|
|
+ # 跳过已配对的开仓交易
|
|
|
+ if df.loc[open_idx, '交易对ID'] != '':
|
|
|
+ continue
|
|
|
+
|
|
|
+ open_time = df.loc[open_idx, '交易时间']
|
|
|
+ open_qty = df.loc[open_idx, '成交数量_数值']
|
|
|
+
|
|
|
+ # 查找该开仓交易之后的平仓交易
|
|
|
+ valid_close_trades = close_trades[
|
|
|
+ (close_trades['交易时间'] >= open_time) &
|
|
|
+ (close_trades['交易对ID'] == '') # 未被配对的平仓交易
|
|
|
+ ].copy()
|
|
|
+
|
|
|
+ if len(valid_close_trades) == 0:
|
|
|
+ # 没有找到匹配的平仓交易
|
|
|
+ df.loc[open_idx, '交易对ID'] = '未配对'
|
|
|
+ unpaired_count += 1
|
|
|
+ continue
|
|
|
+
|
|
|
+ # 累计平仓数量,直到等于开仓数量
|
|
|
+ remaining_qty = open_qty
|
|
|
+ paired_close_indices = []
|
|
|
+
|
|
|
+ for close_idx in valid_close_trades.index:
|
|
|
+ close_qty_val = df.loc[close_idx, '成交数量_数值']
|
|
|
+
|
|
|
+ if remaining_qty <= 0:
|
|
|
+ break
|
|
|
+
|
|
|
+ if close_qty_val <= remaining_qty:
|
|
|
+ # 这笔平仓完全匹配
|
|
|
+ paired_close_indices.append(close_idx)
|
|
|
+ remaining_qty -= close_qty_val
|
|
|
+ elif close_qty_val > remaining_qty:
|
|
|
+ # 这笔平仓数量大于剩余需要量,跳过(保持精确匹配)
|
|
|
+ # 只有在剩余数量很小时才允许部分匹配
|
|
|
+ continue
|
|
|
+
|
|
|
+ # 为配对的交易分配相同的ID(要求精确匹配或接近精确匹配)
|
|
|
+ if len(paired_close_indices) > 0 and abs(remaining_qty) < 0.01:
|
|
|
+ pair_id = f'P{pair_id_counter:04d}'
|
|
|
+ df.loc[open_idx, '交易对ID'] = pair_id
|
|
|
+ for close_idx in paired_close_indices:
|
|
|
+ df.loc[close_idx, '交易对ID'] = pair_id
|
|
|
+
|
|
|
+ paired_count += len(paired_close_indices) + 1 # 开仓+平仓
|
|
|
+ pair_id_counter += 1
|
|
|
+
|
|
|
+ if len(paired_close_indices) > 1:
|
|
|
+ print(f" 1开多平匹配: {target} {direction} 1笔开仓({open_qty:.0f}手) 匹配 {len(paired_close_indices)}笔平仓({sum([df.loc[idx, '成交数量_数值'] for idx in paired_close_indices]):.0f}手) -> {pair_id}")
|
|
|
+ elif len(paired_close_indices) > 0:
|
|
|
+ # 部分匹配,发出警告但不配对
|
|
|
+ print(f" 警告: {target} {direction} 开仓在 {open_time} 有 {remaining_qty:.2f} 未配对,跳过配对")
|
|
|
+ df.loc[open_idx, '交易对ID'] = '未配对'
|
|
|
+ unpaired_count += 1
|
|
|
+ else:
|
|
|
+ # 没有配对成功
|
|
|
+ df.loc[open_idx, '交易对ID'] = '未配对'
|
|
|
+ unpaired_count += 1
|
|
|
+
|
|
|
+ # 统计信息
|
|
|
+ print(f"\n配对完成:")
|
|
|
+ print(f" 已配对交易: {paired_count} 条")
|
|
|
+ print(f" 未配对交易: {unpaired_count} 条")
|
|
|
+ print(f" 生成交易对: {pair_id_counter - 1} 对")
|
|
|
+
|
|
|
+ # 后处理:修复不完整配对
|
|
|
+ df = fix_incomplete_pairs(df)
|
|
|
+
|
|
|
+ return df
|
|
|
+
|
|
|
+
|
|
|
+def save_result(df, output_path):
|
|
|
+ """
|
|
|
+ 保存配对结果到CSV文件
|
|
|
+
|
|
|
+ 参数:
|
|
|
+ df (pandas.DataFrame): 包含交易对ID的DataFrame
|
|
|
+ output_path (str): 输出文件路径
|
|
|
+ """
|
|
|
+ df = df.copy()
|
|
|
+
|
|
|
+ # 添加"开仓时间"列
|
|
|
+ # 对于每个交易对ID,找到对应的开仓记录的"最后更新时间"
|
|
|
+ df['开仓时间'] = ''
|
|
|
+
|
|
|
+ # 添加"交易盈亏"列,根据相同的交易对ID对平仓盈亏进行求和
|
|
|
+ df['交易盈亏'] = ''
|
|
|
+
|
|
|
+ for pair_id in df['交易对ID'].unique():
|
|
|
+ if pair_id and pair_id.startswith('P'):
|
|
|
+ # 找到该交易对的所有记录
|
|
|
+ pair_mask = df['交易对ID'] == pair_id
|
|
|
+ pair_records = df[pair_mask]
|
|
|
+
|
|
|
+ # 找到开仓记录(仓位操作为"开仓")
|
|
|
+ open_record = pair_records[pair_records['仓位操作'] == '开仓']
|
|
|
+
|
|
|
+ if len(open_record) > 0:
|
|
|
+ # 获取开仓记录的最后更新时间
|
|
|
+ open_time = open_record.iloc[0]['最后更新时间']
|
|
|
+ # 将开仓时间填充到该交易对的所有记录中
|
|
|
+ df.loc[pair_mask, '开仓时间'] = open_time
|
|
|
+
|
|
|
+ # 计算该交易对的总盈亏(对平仓盈亏求和)
|
|
|
+ try:
|
|
|
+ # 提取平仓盈亏列,转换为数值
|
|
|
+ # 先转换为字符串,去掉千位分隔符(逗号),然后转换为数值
|
|
|
+ close_profit_loss_str = pair_records['平仓盈亏'].astype(str).str.replace(',', '')
|
|
|
+ # 尝试转换为数值,无法转换的设为0
|
|
|
+ close_profit_loss_numeric = pd.to_numeric(close_profit_loss_str, errors='coerce').fillna(0)
|
|
|
+ total_profit_loss = close_profit_loss_numeric.sum()
|
|
|
+ # 将总盈亏填充到该交易对的所有记录中
|
|
|
+ df.loc[pair_mask, '交易盈亏'] = total_profit_loss
|
|
|
+ except Exception as e:
|
|
|
+ # 如果计算失败,设为0
|
|
|
+ df.loc[pair_mask, '交易盈亏'] = 0
|
|
|
+
|
|
|
+ # 移除中间处理列
|
|
|
+ columns_to_remove = ['标的_完整', '交易类型_标准', '仓位操作', '方向', '成交数量_数值', '交易时间']
|
|
|
+ output_columns = [col for col in df.columns if col not in columns_to_remove]
|
|
|
+
|
|
|
+ # 调整列顺序,确保交易对ID、开仓时间和交易盈亏在最后
|
|
|
+ if '交易对ID' in output_columns:
|
|
|
+ output_columns.remove('交易对ID')
|
|
|
+ if '开仓时间' in output_columns:
|
|
|
+ output_columns.remove('开仓时间')
|
|
|
+ if '交易盈亏' in output_columns:
|
|
|
+ output_columns.remove('交易盈亏')
|
|
|
+ output_columns.append('交易对ID')
|
|
|
+ output_columns.append('开仓时间')
|
|
|
+ output_columns.append('交易盈亏')
|
|
|
+
|
|
|
+ # 按交易对ID和日期升序排序
|
|
|
+ # 创建排序辅助列:未配对的排在最后,其他按ID数字排序
|
|
|
+ def get_sort_key(pair_id):
|
|
|
+ if pd.isna(pair_id) or pair_id == '' or pair_id == '未配对':
|
|
|
+ return (1, '') # 未配对排在最后
|
|
|
+ elif isinstance(pair_id, str) and pair_id.startswith('P'):
|
|
|
+ try:
|
|
|
+ # 提取数字部分用于排序
|
|
|
+ num = int(pair_id[1:])
|
|
|
+ return (0, num) # 已配对的排在前面,按数字排序
|
|
|
+ except:
|
|
|
+ return (1, pair_id)
|
|
|
+ else:
|
|
|
+ return (1, str(pair_id))
|
|
|
+
|
|
|
+ df['_sort_key_id'] = df['交易对ID'].apply(get_sort_key)
|
|
|
+
|
|
|
+ # 确保日期列可以排序(转换为datetime类型)
|
|
|
+ if '日期' in df.columns:
|
|
|
+ df['_sort_date'] = pd.to_datetime(df['日期'], errors='coerce')
|
|
|
+ else:
|
|
|
+ df['_sort_date'] = pd.NaT
|
|
|
+
|
|
|
+ # 先按交易对ID排序,再按日期排序
|
|
|
+ df = df.sort_values(['_sort_key_id', '_sort_date'], ascending=[True, True]).reset_index(drop=True)
|
|
|
+ df = df.drop(['_sort_key_id', '_sort_date'], axis=1)
|
|
|
+
|
|
|
+ # 保存到CSV
|
|
|
+ try:
|
|
|
+ df[output_columns].to_csv(output_path, index=False, encoding='utf-8-sig')
|
|
|
+ print(f"\n结果已保存到: {output_path}")
|
|
|
+
|
|
|
+ # 获取文件大小
|
|
|
+ file_size = os.path.getsize(output_path) / 1024 # KB
|
|
|
+ print(f"文件大小: {file_size:.2f} KB")
|
|
|
+ except Exception as e:
|
|
|
+ print(f"保存文件时出错: {str(e)}")
|
|
|
+
|
|
|
+
|
|
|
+def print_statistics(df):
|
|
|
+ """
|
|
|
+ 打印配对统计信息
|
|
|
+
|
|
|
+ 参数:
|
|
|
+ df (pandas.DataFrame): 包含交易对ID的DataFrame
|
|
|
+ """
|
|
|
+ print("\n" + "=" * 60)
|
|
|
+ print("配对统计信息")
|
|
|
+ print("=" * 60)
|
|
|
+
|
|
|
+ # 统计已配对和未配对
|
|
|
+ paired = df[df['交易对ID'].str.startswith('P', na=False)]
|
|
|
+ unpaired = df[df['交易对ID'] == '未配对']
|
|
|
+
|
|
|
+ print(f"\n总交易记录: {len(df)} 条")
|
|
|
+ print(f"已配对交易: {len(paired)} 条 ({len(paired)/len(df)*100:.1f}%)")
|
|
|
+ print(f"未配对交易: {len(unpaired)} 条 ({len(unpaired)/len(df)*100:.1f}%)")
|
|
|
+
|
|
|
+ # 统计交易对数量
|
|
|
+ unique_pairs = paired['交易对ID'].nunique()
|
|
|
+ print(f"\n交易对数量: {unique_pairs} 对")
|
|
|
+
|
|
|
+ # 统计每个交易对的平仓次数分布
|
|
|
+ if len(paired) > 0:
|
|
|
+ pair_counts = paired.groupby('交易对ID').size()
|
|
|
+ print(f"\n交易对组成分布:")
|
|
|
+ distribution = pair_counts.value_counts().sort_index()
|
|
|
+ for count, freq in distribution.items():
|
|
|
+ if count == 2:
|
|
|
+ print(f" 1开1平: {freq} 对")
|
|
|
+ else:
|
|
|
+ print(f" 1开{count-1}平: {freq} 对")
|
|
|
+
|
|
|
+ # 按标的统计
|
|
|
+ print(f"\n按标的统计:")
|
|
|
+ target_stats = df.groupby('标的_完整')['交易对ID'].apply(
|
|
|
+ lambda x: f"总:{len(x)}条, 已配对:{len(x[x.str.startswith('P', na=False)])}条"
|
|
|
+ )
|
|
|
+ for target, stats in target_stats.items():
|
|
|
+ print(f" {target}: {stats}")
|
|
|
+
|
|
|
+
|
|
|
+def analyze_transaction_pairs(csv_filename=None, output_filename=None):
|
|
|
+ """
|
|
|
+ 主函数:分析交易配对
|
|
|
+
|
|
|
+ 参数:
|
|
|
+ csv_filename (str): 输入CSV文件名
|
|
|
+ output_filename (str): 输出CSV文件名(可选)
|
|
|
+ """
|
|
|
+ print("=" * 60)
|
|
|
+ print("交易配对分析工具")
|
|
|
+ print("=" * 60)
|
|
|
+
|
|
|
+ # 设置文件路径
|
|
|
+ if csv_filename is None:
|
|
|
+ csv_filename = 'transaction.csv'
|
|
|
+
|
|
|
+ current_dir = _get_current_directory()
|
|
|
+ csv_path = os.path.join(current_dir, csv_filename)
|
|
|
+
|
|
|
+ if not os.path.exists(csv_path):
|
|
|
+ print(f"错误: 文件不存在 - {csv_path}")
|
|
|
+ return
|
|
|
+
|
|
|
+ # 设置输出文件名
|
|
|
+ if output_filename is None:
|
|
|
+ timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
|
|
+ base_name = os.path.splitext(csv_filename)[0]
|
|
|
+ output_filename = f"{base_name}_paired_{timestamp}.csv"
|
|
|
+
|
|
|
+ output_path = os.path.join(current_dir, output_filename)
|
|
|
+
|
|
|
+ # 步骤1: 读取CSV
|
|
|
+ print(f"\n步骤1: 读取CSV文件")
|
|
|
+ print(f"文件路径: {csv_path}")
|
|
|
+ df = read_transaction_csv(csv_path)
|
|
|
+
|
|
|
+ if len(df) == 0:
|
|
|
+ print("错误: 无法读取数据")
|
|
|
+ return
|
|
|
+
|
|
|
+ # 步骤2: 解析数据
|
|
|
+ print(f"\n步骤2: 解析交易数据")
|
|
|
+ df = parse_transaction_data(df)
|
|
|
+
|
|
|
+ if len(df) == 0:
|
|
|
+ print("错误: 没有有效的交易记录")
|
|
|
+ return
|
|
|
+
|
|
|
+ # 步骤3: 配对交易
|
|
|
+ print(f"\n步骤3: 配对交易")
|
|
|
+ df = pair_transactions(df)
|
|
|
+
|
|
|
+ # 步骤4: 保存结果
|
|
|
+ print(f"\n步骤4: 保存结果")
|
|
|
+ save_result(df, output_path)
|
|
|
+
|
|
|
+ # 步骤5: 打印统计信息
|
|
|
+ print_statistics(df)
|
|
|
+
|
|
|
+ print("\n" + "=" * 60)
|
|
|
+ print("分析完成")
|
|
|
+ print("=" * 60)
|
|
|
+
|
|
|
+
|
|
|
+# 使用示例
|
|
|
+if __name__ == "__main__":
|
|
|
+ # 可以指定CSV文件名,如果不指定则使用默认的 transaction.csv
|
|
|
+ analyze_transaction_pairs()
|
|
|
+ # 或者指定特定文件: analyze_transaction_pairs('transaction.csv')
|
|
|
+
|