1 mês atrás · f6b2c91188
--- a/Lib/future/__pycache__/transaction_pair_analysis.cpython-311.pyc
+++ b/Lib/future/__pycache__/transaction_pair_analysis.cpython-311.pyc
--- a/Lib/future/transaction_pair_analysis.py
+++ b/Lib/future/transaction_pair_analysis.py
@@ -455,6 +455,156 @@ def pair_transactions(df):
 
				     return df
			
 
				 
			
 
				 
			
 
				+def extract_symbol_core(symbol):
			
 
				+    """
			
 
				+    从标的字符串中提取标的核心字母
			
 
				+
			
 
				+    参数:
			
 
				+        symbol (str): 标的字符串，如"10年期国债期货(T2006.CCFX)"
			
 
				+
			
 
				+    返回:
			
 
				+        tuple: (括号内完整代码, 标的核心字母)
			
 
				+    """
			
 
				+    try:
			
 
				+        # 提取括号内的内容
			
 
				+        match = re.search(r'\(([^)]+)\)', symbol)
			
 
				+        if match:
			
 
				+            full_code = match.group(1)
			
 
				+            # 去掉后面的9位获取标的核心字母
			
 
				+            core_symbol = full_code[:-9] if len(full_code) > 9 else full_code
			
 
				+            return full_code, core_symbol
			
 
				+        else:
			
 
				+            return symbol, symbol
			
 
				+    except:
			
 
				+        return symbol, symbol
			
 
				+
			
 
				+
			
 
				+def identify_continuous_trade_pairs(df):
			
 
				+    """
			
 
				+    识别连续交易对
			
 
				+
			
 
				+    参数:
			
 
				+        df (pandas.DataFrame): 包含交易对ID的交易数据
			
 
				+
			
 
				+    返回:
			
 
				+        pandas.DataFrame: 添加了连续交易对ID的DataFrame
			
 
				+    """
			
 
				+    print("\n开始识别连续交易对...")
			
 
				+
			
 
				+    df = df.copy()
			
 
				+    df['连续交易对ID'] = 'N/A'  # 初始化连续交易对ID列
			
 
				+
			
 
				+    # 提取标的核心字母
			
 
				+    df['标的核心字母'] = df['标的'].apply(lambda x: extract_symbol_core(x)[1])
			
 
				+
			
 
				+    # 获取所有已配对的交易对ID
			
 
				+    paired_mask = df['交易对ID'].str.startswith('P', na=False)
			
 
				+    paired_df = df[paired_mask].copy()
			
 
				+
			
 
				+    if len(paired_df) == 0:
			
 
				+        print("没有已配对的交易")
			
 
				+        return df
			
 
				+
			
 
				+    # 按交易对ID分组
			
 
				+    pair_groups = paired_df.groupby('交易对ID')
			
 
				+
			
 
				+    # 存储连续交易对关系
			
 
				+    continuous_groups = []  # 每个元素是一组连续的交易对ID
			
 
				+    processed_pairs = set()  # 已处理的交易对ID
			
 
				+
			
 
				+    for pair_id, group in pair_groups:
			
 
				+        if pair_id in processed_pairs:
			
 
				+            continue
			
 
				+
			
 
				+        # 获取当前交易对的开仓和平仓记录
			
 
				+        open_trades = group[group['仓位操作'] == '开仓'].copy()
			
 
				+        close_trades = group[group['仓位操作'] == '平仓'].copy()
			
 
				+
			
 
				+        if len(open_trades) == 0 or len(close_trades) == 0:
			
 
				+            continue
			
 
				+
			
 
				+        # 获取当前交易对的关键信息
			
 
				+        current_core_symbol = group['标的核心字母'].iloc[0]
			
 
				+        current_direction = group['方向'].iloc[0]
			
 
				+        current_close_date = close_trades['日期'].iloc[0]
			
 
				+        current_close_time = close_trades['委托时间'].iloc[0]
			
 
				+        current_close_qty = close_trades['成交数量_数值'].sum()
			
 
				+
			
 
				+        # 查找匹配的连续交易对
			
 
				+        matching_pairs = []
			
 
				+
			
 
				+        for other_pair_id, other_group in pair_groups:
			
 
				+            if other_pair_id == pair_id or other_pair_id in processed_pairs:
			
 
				+                continue
			
 
				+
			
 
				+            # 获取另一个交易对的开仓和平仓记录
			
 
				+            other_open_trades = other_group[other_group['仓位操作'] == '开仓'].copy()
			
 
				+            other_close_trades = other_group[other_group['仓位操作'] == '平仓'].copy()
			
 
				+
			
 
				+            if len(other_open_trades) == 0 or len(other_close_trades) == 0:
			
 
				+                continue
			
 
				+
			
 
				+            # 检查条件1：平仓和开仓的日期、委托时间完全一致
			
 
				+            other_open_date = other_open_trades['日期'].iloc[0]
			
 
				+            other_open_time = other_open_trades['委托时间'].iloc[0]
			
 
				+
			
 
				+            # 由于可能有多个开仓，检查是否有至少一个与平仓时间完全一致
			
 
				+            time_match_found = False
			
 
				+            for _, open_trade in other_open_trades.iterrows():
			
 
				+                if (open_trade['日期'] == current_close_date and
			
 
				+                    open_trade['委托时间'] == current_close_time):
			
 
				+                    time_match_found = True
			
 
				+                    break
			
 
				+
			
 
				+            if not time_match_found:
			
 
				+                continue
			
 
				+
			
 
				+            # 检查条件2：交易类型匹配（平多对应开多，平空对应开空）
			
 
				+            other_direction = other_group['方向'].iloc[0]
			
 
				+            if current_direction != other_direction:
			
 
				+                continue
			
 
				+
			
 
				+            # 检查条件3：标的核心字母一致
			
 
				+            other_core_symbol = other_group['标的核心字母'].iloc[0]
			
 
				+            if current_core_symbol != other_core_symbol:
			
 
				+                continue
			
 
				+
			
 
				+            # 检查条件4：成交数量绝对值一致
			
 
				+            other_open_qty = other_open_trades['成交数量_数值'].sum()
			
 
				+            if abs(current_close_qty - other_open_qty) > 0.01:
			
 
				+                continue
			
 
				+
			
 
				+            # 所有条件都满足，这是一个连续交易对
			
 
				+            matching_pairs.append(other_pair_id)
			
 
				+            processed_pairs.add(other_pair_id)
			
 
				+
			
 
				+        # 如果找到匹配的连续交易对
			
 
				+        if matching_pairs:
			
 
				+            # 创建连续交易对组（包含当前交易对和所有匹配的交易对）
			
 
				+            continuous_group = [pair_id] + matching_pairs
			
 
				+            continuous_groups.append(continuous_group)
			
 
				+            processed_pairs.add(pair_id)
			
 
				+
			
 
				+            print(f"  发现连续交易对组: {continuous_group}")
			
 
				+            print(f"    核心标的: {current_core_symbol}, 方向: {current_direction}")
			
 
				+            print(f"    平仓时间: {current_close_date} {current_close_time}")
			
 
				+            print(f"    平仓数量: {current_close_qty:.2f}, 开仓数量: {other_open_qty:.2f}")
			
 
				+
			
 
				+    # 为连续交易对分配ID
			
 
				+    for i, continuous_group in enumerate(continuous_groups):
			
 
				+        continuous_id = f'C{i+1:04d}'
			
 
				+        for pair_id in continuous_group:
			
 
				+            mask = df['交易对ID'] == pair_id
			
 
				+            df.loc[mask, '连续交易对ID'] = continuous_id
			
 
				+
			
 
				+    print(f"\n识别完成，共发现 {len(continuous_groups)} 组连续交易对")
			
 
				+
			
 
				+    # 清理临时列
			
 
				+    df = df.drop('标的核心字母', axis=1)
			
 
				+
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				 def save_result(df, output_path):
			
 
				     """
			
 
				     保存配对结果到CSV文件
			
@@ -504,15 +654,18 @@ def save_result(df, output_path):
 
				     # 移除中间处理列
			
 
				     columns_to_remove = ['标的_完整', '交易类型_标准', '仓位操作', '方向', '成交数量_数值', '交易时间']
			
 
				     output_columns = [col for col in df.columns if col not in columns_to_remove]
			
 
				-    
			
 
				-    # 调整列顺序，确保交易对ID、开仓时间和交易盈亏在最后
			
 
				+
			
 
				+    # 调整列顺序，确保交易对ID、连续交易对ID、开仓时间和交易盈亏在最后
			
 
				     if '交易对ID' in output_columns:
			
 
				         output_columns.remove('交易对ID')
			
 
				+    if '连续交易对ID' in output_columns:
			
 
				+        output_columns.remove('连续交易对ID')
			
 
				     if '开仓时间' in output_columns:
			
 
				         output_columns.remove('开仓时间')
			
 
				     if '交易盈亏' in output_columns:
			
 
				         output_columns.remove('交易盈亏')
			
 
				     output_columns.append('交易对ID')
			
 
				+    output_columns.append('连续交易对ID')
			
 
				     output_columns.append('开仓时间')
			
 
				     output_columns.append('交易盈亏')
			
 
				     
			
@@ -558,26 +711,40 @@ def save_result(df, output_path):
 
				 def print_statistics(df):
			
 
				     """
			
 
				     打印配对统计信息
			
 
				-    
			
 
				+
			
 
				     参数:
			
 
				         df (pandas.DataFrame): 包含交易对ID的DataFrame
			
 
				     """
			
 
				     print("\n" + "=" * 60)
			
 
				     print("配对统计信息")
			
 
				     print("=" * 60)
			
 
				-    
			
 
				+
			
 
				     # 统计已配对和未配对
			
 
				     paired = df[df['交易对ID'].str.startswith('P', na=False)]
			
 
				     unpaired = df[df['交易对ID'] == '未配对']
			
 
				-    
			
 
				+
			
 
				     print(f"\n总交易记录: {len(df)} 条")
			
 
				     print(f"已配对交易: {len(paired)} 条 ({len(paired)/len(df)*100:.1f}%)")
			
 
				     print(f"未配对交易: {len(unpaired)} 条 ({len(unpaired)/len(df)*100:.1f}%)")
			
 
				-    
			
 
				+
			
 
				     # 统计交易对数量
			
 
				     unique_pairs = paired['交易对ID'].nunique()
			
 
				     print(f"\n交易对数量: {unique_pairs} 对")
			
 
				-    
			
 
				+
			
 
				+    # 统计连续交易对
			
 
				+    if '连续交易对ID' in df.columns:
			
 
				+        continuous_pairs = df[df['连续交易对ID'] != 'N/A']
			
 
				+        unique_continuous_pairs = continuous_pairs['连续交易对ID'].nunique()
			
 
				+        print(f"\n连续交易对统计:")
			
 
				+        print(f"  连续交易对数量: {unique_continuous_pairs} 组")
			
 
				+        print(f"  涉及交易记录: {len(continuous_pairs)} 条")
			
 
				+        if len(continuous_pairs) > 0:
			
 
				+            # 统计每组连续交易对的交易对数量
			
 
				+            continuous_stats = continuous_pairs.groupby('连续交易对ID')['交易对ID'].nunique()
			
 
				+            print(f"  每组连续交易对包含的交易对数量分布:")
			
 
				+            for continuous_id, pair_count in continuous_stats.items():
			
 
				+                print(f"    {continuous_id}: {pair_count} 个交易对")
			
 
				+
			
 
				     # 统计每个交易对的平仓次数分布
			
 
				     if len(paired) > 0:
			
 
				         pair_counts = paired.groupby('交易对ID').size()
			
@@ -588,7 +755,7 @@ def print_statistics(df):
 
				                 print(f"  1开1平: {freq} 对")
			
 
				             else:
			
 
				                 print(f"  1开{count-1}平: {freq} 对")
			
 
				-    
			
 
				+
			
 
				     # 按标的统计
			
 
				     print(f"\n按标的统计:")
			
 
				     target_stats = df.groupby('标的_完整')['交易对ID'].apply(
			
@@ -649,14 +816,18 @@ def analyze_transaction_pairs(csv_filename=None, output_filename=None):
 
				     # 步骤3: 配对交易
			
 
				     print(f"\n步骤3: 配对交易")
			
 
				     df = pair_transactions(df)
			
 
				-    
			
 
				-    # 步骤4: 保存结果
			
 
				-    print(f"\n步骤4: 保存结果")
			
 
				+
			
 
				+    # 步骤4: 识别连续交易对
			
 
				+    print(f"\n步骤4: 识别连续交易对")
			
 
				+    df = identify_continuous_trade_pairs(df)
			
 
				+
			
 
				+    # 步骤5: 保存结果
			
 
				+    print(f"\n步骤5: 保存结果")
			
 
				     save_result(df, output_path)
			
 
				     
			
 
				-    # 步骤5: 打印统计信息
			
 
				+    # 步骤6: 打印统计信息
			
 
				     print_statistics(df)
			
 
				-    
			
 
				+
			
 
				     print("\n" + "=" * 60)
			
 
				     print("分析完成")
			
 
				     print("=" * 60)