|
|
@@ -455,6 +455,156 @@ def pair_transactions(df):
|
|
|
return df
|
|
|
|
|
|
|
|
|
+def extract_symbol_core(symbol):
|
|
|
+ """
|
|
|
+ 从标的字符串中提取标的核心字母
|
|
|
+
|
|
|
+ 参数:
|
|
|
+ symbol (str): 标的字符串,如"10年期国债期货(T2006.CCFX)"
|
|
|
+
|
|
|
+ 返回:
|
|
|
+ tuple: (括号内完整代码, 标的核心字母)
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ # 提取括号内的内容
|
|
|
+ match = re.search(r'\(([^)]+)\)', symbol)
|
|
|
+ if match:
|
|
|
+ full_code = match.group(1)
|
|
|
+ # 去掉后面的9位获取标的核心字母
|
|
|
+ core_symbol = full_code[:-9] if len(full_code) > 9 else full_code
|
|
|
+ return full_code, core_symbol
|
|
|
+ else:
|
|
|
+ return symbol, symbol
|
|
|
+ except:
|
|
|
+ return symbol, symbol
|
|
|
+
|
|
|
+
|
|
|
+def identify_continuous_trade_pairs(df):
|
|
|
+ """
|
|
|
+ 识别连续交易对
|
|
|
+
|
|
|
+ 参数:
|
|
|
+ df (pandas.DataFrame): 包含交易对ID的交易数据
|
|
|
+
|
|
|
+ 返回:
|
|
|
+ pandas.DataFrame: 添加了连续交易对ID的DataFrame
|
|
|
+ """
|
|
|
+ print("\n开始识别连续交易对...")
|
|
|
+
|
|
|
+ df = df.copy()
|
|
|
+ df['连续交易对ID'] = 'N/A' # 初始化连续交易对ID列
|
|
|
+
|
|
|
+ # 提取标的核心字母
|
|
|
+ df['标的核心字母'] = df['标的'].apply(lambda x: extract_symbol_core(x)[1])
|
|
|
+
|
|
|
+ # 获取所有已配对的交易对ID
|
|
|
+ paired_mask = df['交易对ID'].str.startswith('P', na=False)
|
|
|
+ paired_df = df[paired_mask].copy()
|
|
|
+
|
|
|
+ if len(paired_df) == 0:
|
|
|
+ print("没有已配对的交易")
|
|
|
+ return df
|
|
|
+
|
|
|
+ # 按交易对ID分组
|
|
|
+ pair_groups = paired_df.groupby('交易对ID')
|
|
|
+
|
|
|
+ # 存储连续交易对关系
|
|
|
+ continuous_groups = [] # 每个元素是一组连续的交易对ID
|
|
|
+ processed_pairs = set() # 已处理的交易对ID
|
|
|
+
|
|
|
+ for pair_id, group in pair_groups:
|
|
|
+ if pair_id in processed_pairs:
|
|
|
+ continue
|
|
|
+
|
|
|
+ # 获取当前交易对的开仓和平仓记录
|
|
|
+ open_trades = group[group['仓位操作'] == '开仓'].copy()
|
|
|
+ close_trades = group[group['仓位操作'] == '平仓'].copy()
|
|
|
+
|
|
|
+ if len(open_trades) == 0 or len(close_trades) == 0:
|
|
|
+ continue
|
|
|
+
|
|
|
+ # 获取当前交易对的关键信息
|
|
|
+ current_core_symbol = group['标的核心字母'].iloc[0]
|
|
|
+ current_direction = group['方向'].iloc[0]
|
|
|
+ current_close_date = close_trades['日期'].iloc[0]
|
|
|
+ current_close_time = close_trades['委托时间'].iloc[0]
|
|
|
+ current_close_qty = close_trades['成交数量_数值'].sum()
|
|
|
+
|
|
|
+ # 查找匹配的连续交易对
|
|
|
+ matching_pairs = []
|
|
|
+
|
|
|
+ for other_pair_id, other_group in pair_groups:
|
|
|
+ if other_pair_id == pair_id or other_pair_id in processed_pairs:
|
|
|
+ continue
|
|
|
+
|
|
|
+ # 获取另一个交易对的开仓和平仓记录
|
|
|
+ other_open_trades = other_group[other_group['仓位操作'] == '开仓'].copy()
|
|
|
+ other_close_trades = other_group[other_group['仓位操作'] == '平仓'].copy()
|
|
|
+
|
|
|
+ if len(other_open_trades) == 0 or len(other_close_trades) == 0:
|
|
|
+ continue
|
|
|
+
|
|
|
+ # 检查条件1:平仓和开仓的日期、委托时间完全一致
|
|
|
+ other_open_date = other_open_trades['日期'].iloc[0]
|
|
|
+ other_open_time = other_open_trades['委托时间'].iloc[0]
|
|
|
+
|
|
|
+ # 由于可能有多个开仓,检查是否有至少一个与平仓时间完全一致
|
|
|
+ time_match_found = False
|
|
|
+ for _, open_trade in other_open_trades.iterrows():
|
|
|
+ if (open_trade['日期'] == current_close_date and
|
|
|
+ open_trade['委托时间'] == current_close_time):
|
|
|
+ time_match_found = True
|
|
|
+ break
|
|
|
+
|
|
|
+ if not time_match_found:
|
|
|
+ continue
|
|
|
+
|
|
|
+ # 检查条件2:交易类型匹配(平多对应开多,平空对应开空)
|
|
|
+ other_direction = other_group['方向'].iloc[0]
|
|
|
+ if current_direction != other_direction:
|
|
|
+ continue
|
|
|
+
|
|
|
+ # 检查条件3:标的核心字母一致
|
|
|
+ other_core_symbol = other_group['标的核心字母'].iloc[0]
|
|
|
+ if current_core_symbol != other_core_symbol:
|
|
|
+ continue
|
|
|
+
|
|
|
+ # 检查条件4:成交数量绝对值一致
|
|
|
+ other_open_qty = other_open_trades['成交数量_数值'].sum()
|
|
|
+ if abs(current_close_qty - other_open_qty) > 0.01:
|
|
|
+ continue
|
|
|
+
|
|
|
+ # 所有条件都满足,这是一个连续交易对
|
|
|
+ matching_pairs.append(other_pair_id)
|
|
|
+ processed_pairs.add(other_pair_id)
|
|
|
+
|
|
|
+ # 如果找到匹配的连续交易对
|
|
|
+ if matching_pairs:
|
|
|
+ # 创建连续交易对组(包含当前交易对和所有匹配的交易对)
|
|
|
+ continuous_group = [pair_id] + matching_pairs
|
|
|
+ continuous_groups.append(continuous_group)
|
|
|
+ processed_pairs.add(pair_id)
|
|
|
+
|
|
|
+ print(f" 发现连续交易对组: {continuous_group}")
|
|
|
+ print(f" 核心标的: {current_core_symbol}, 方向: {current_direction}")
|
|
|
+ print(f" 平仓时间: {current_close_date} {current_close_time}")
|
|
|
+ print(f" 平仓数量: {current_close_qty:.2f}, 开仓数量: {other_open_qty:.2f}")
|
|
|
+
|
|
|
+ # 为连续交易对分配ID
|
|
|
+ for i, continuous_group in enumerate(continuous_groups):
|
|
|
+ continuous_id = f'C{i+1:04d}'
|
|
|
+ for pair_id in continuous_group:
|
|
|
+ mask = df['交易对ID'] == pair_id
|
|
|
+ df.loc[mask, '连续交易对ID'] = continuous_id
|
|
|
+
|
|
|
+ print(f"\n识别完成,共发现 {len(continuous_groups)} 组连续交易对")
|
|
|
+
|
|
|
+ # 清理临时列
|
|
|
+ df = df.drop('标的核心字母', axis=1)
|
|
|
+
|
|
|
+ return df
|
|
|
+
|
|
|
+
|
|
|
def save_result(df, output_path):
|
|
|
"""
|
|
|
保存配对结果到CSV文件
|
|
|
@@ -504,15 +654,18 @@ def save_result(df, output_path):
|
|
|
# 移除中间处理列
|
|
|
columns_to_remove = ['标的_完整', '交易类型_标准', '仓位操作', '方向', '成交数量_数值', '交易时间']
|
|
|
output_columns = [col for col in df.columns if col not in columns_to_remove]
|
|
|
-
|
|
|
- # 调整列顺序,确保交易对ID、开仓时间和交易盈亏在最后
|
|
|
+
|
|
|
+ # 调整列顺序,确保交易对ID、连续交易对ID、开仓时间和交易盈亏在最后
|
|
|
if '交易对ID' in output_columns:
|
|
|
output_columns.remove('交易对ID')
|
|
|
+ if '连续交易对ID' in output_columns:
|
|
|
+ output_columns.remove('连续交易对ID')
|
|
|
if '开仓时间' in output_columns:
|
|
|
output_columns.remove('开仓时间')
|
|
|
if '交易盈亏' in output_columns:
|
|
|
output_columns.remove('交易盈亏')
|
|
|
output_columns.append('交易对ID')
|
|
|
+ output_columns.append('连续交易对ID')
|
|
|
output_columns.append('开仓时间')
|
|
|
output_columns.append('交易盈亏')
|
|
|
|
|
|
@@ -558,26 +711,40 @@ def save_result(df, output_path):
|
|
|
def print_statistics(df):
|
|
|
"""
|
|
|
打印配对统计信息
|
|
|
-
|
|
|
+
|
|
|
参数:
|
|
|
df (pandas.DataFrame): 包含交易对ID的DataFrame
|
|
|
"""
|
|
|
print("\n" + "=" * 60)
|
|
|
print("配对统计信息")
|
|
|
print("=" * 60)
|
|
|
-
|
|
|
+
|
|
|
# 统计已配对和未配对
|
|
|
paired = df[df['交易对ID'].str.startswith('P', na=False)]
|
|
|
unpaired = df[df['交易对ID'] == '未配对']
|
|
|
-
|
|
|
+
|
|
|
print(f"\n总交易记录: {len(df)} 条")
|
|
|
print(f"已配对交易: {len(paired)} 条 ({len(paired)/len(df)*100:.1f}%)")
|
|
|
print(f"未配对交易: {len(unpaired)} 条 ({len(unpaired)/len(df)*100:.1f}%)")
|
|
|
-
|
|
|
+
|
|
|
# 统计交易对数量
|
|
|
unique_pairs = paired['交易对ID'].nunique()
|
|
|
print(f"\n交易对数量: {unique_pairs} 对")
|
|
|
-
|
|
|
+
|
|
|
+ # 统计连续交易对
|
|
|
+ if '连续交易对ID' in df.columns:
|
|
|
+ continuous_pairs = df[df['连续交易对ID'] != 'N/A']
|
|
|
+ unique_continuous_pairs = continuous_pairs['连续交易对ID'].nunique()
|
|
|
+ print(f"\n连续交易对统计:")
|
|
|
+ print(f" 连续交易对数量: {unique_continuous_pairs} 组")
|
|
|
+ print(f" 涉及交易记录: {len(continuous_pairs)} 条")
|
|
|
+ if len(continuous_pairs) > 0:
|
|
|
+ # 统计每组连续交易对的交易对数量
|
|
|
+ continuous_stats = continuous_pairs.groupby('连续交易对ID')['交易对ID'].nunique()
|
|
|
+ print(f" 每组连续交易对包含的交易对数量分布:")
|
|
|
+ for continuous_id, pair_count in continuous_stats.items():
|
|
|
+ print(f" {continuous_id}: {pair_count} 个交易对")
|
|
|
+
|
|
|
# 统计每个交易对的平仓次数分布
|
|
|
if len(paired) > 0:
|
|
|
pair_counts = paired.groupby('交易对ID').size()
|
|
|
@@ -588,7 +755,7 @@ def print_statistics(df):
|
|
|
print(f" 1开1平: {freq} 对")
|
|
|
else:
|
|
|
print(f" 1开{count-1}平: {freq} 对")
|
|
|
-
|
|
|
+
|
|
|
# 按标的统计
|
|
|
print(f"\n按标的统计:")
|
|
|
target_stats = df.groupby('标的_完整')['交易对ID'].apply(
|
|
|
@@ -649,14 +816,18 @@ def analyze_transaction_pairs(csv_filename=None, output_filename=None):
|
|
|
# 步骤3: 配对交易
|
|
|
print(f"\n步骤3: 配对交易")
|
|
|
df = pair_transactions(df)
|
|
|
-
|
|
|
- # 步骤4: 保存结果
|
|
|
- print(f"\n步骤4: 保存结果")
|
|
|
+
|
|
|
+ # 步骤4: 识别连续交易对
|
|
|
+ print(f"\n步骤4: 识别连续交易对")
|
|
|
+ df = identify_continuous_trade_pairs(df)
|
|
|
+
|
|
|
+ # 步骤5: 保存结果
|
|
|
+ print(f"\n步骤5: 保存结果")
|
|
|
save_result(df, output_path)
|
|
|
|
|
|
- # 步骤5: 打印统计信息
|
|
|
+ # 步骤6: 打印统计信息
|
|
|
print_statistics(df)
|
|
|
-
|
|
|
+
|
|
|
print("\n" + "=" * 60)
|
|
|
print("分析完成")
|
|
|
print("=" * 60)
|