# 交易配对分析工具
# 用于从交易记录CSV文件中为开仓/平仓交易进行配对，为每对关联交易分配相同ID

import pandas as pd
import numpy as np
from datetime import datetime
import re
import os
import warnings
warnings.filterwarnings('ignore')


def _get_current_directory():
    """
    获取当前文件所在目录
    
    返回:
        str: 当前目录路径
    """
    try:
        current_dir = os.path.dirname(os.path.abspath(__file__))
    except NameError:
        current_dir = os.getcwd()
    return current_dir


def read_transaction_csv(csv_path):
    """
    读取交易记录CSV文件，支持多种编码格式
    
    参数:
        csv_path (str): CSV文件路径
    
    返回:
        pandas.DataFrame: 包含交易记录的DataFrame
    """
    encodings = ['gbk', 'utf-8-sig', 'utf-8', 'gb2312', 'gb18030', 'latin1']
    
    for encoding in encodings:
        try:
            df = pd.read_csv(csv_path, encoding=encoding)
            print(f"成功使用 {encoding} 编码读取CSV文件")
            print(f"从CSV文件中读取到 {len(df)} 条记录")
            return df
        except UnicodeDecodeError:
            continue
        except Exception as e:
            if encoding == encodings[-1]:
                print(f"读取CSV文件时出错: {str(e)}")
                raise
            continue
    
    print(f"无法使用任何编码格式读取CSV文件: {csv_path}")
    return pd.DataFrame()


def parse_transaction_data(df):
    """
    解析交易数据，提取关键信息
    
    参数:
        df (pandas.DataFrame): 原始交易数据
    
    返回:
        pandas.DataFrame: 添加了解析字段的DataFrame
    """
    df = df.copy()
    
    # 提取标的（完整信息用于匹配）
    df['标的_完整'] = df['标的'].astype(str).str.strip()
    
    # 提取交易类型
    df['交易类型_标准'] = df['交易类型'].astype(str).str.strip()
    
    # 判断是开仓还是平仓
    df['仓位操作'] = df['交易类型_标准'].apply(lambda x: '开仓' if '开' in x else ('平仓' if '平' in x else '未知'))
    
    # 判断方向（多/空）
    df['方向'] = df['交易类型_标准'].apply(lambda x: '多' if '多' in x else ('空' if '空' in x else '未知'))
    
    # 从成交数量中提取数字（去掉"手"等单位）
    def extract_quantity(qty_str):
        """从成交数量字符串中提取数字"""
        try:
            qty_str = str(qty_str).strip()
            # 使用正则提取数字（包括负号和小数点）
            match = re.search(r'(-?\d+(?:\.\d+)?)', qty_str)
            if match:
                return abs(float(match.group(1)))  # 返回绝对值
            return 0
        except:
            return 0
    
    df['成交数量_数值'] = df['成交数量'].apply(extract_quantity)
    
    # 合并日期和时间为完整时间戳用于排序
    def parse_datetime(row):
        """解析日期和时间"""
        try:
            date_str = str(row['日期']).strip()
            time_str = str(row['委托时间']).strip()
            datetime_str = f"{date_str} {time_str}"
            return pd.to_datetime(datetime_str)
        except:
            return pd.NaT
    
    df['交易时间'] = df.apply(parse_datetime, axis=1)
    
    # 过滤掉无效记录
    df = df[df['成交数量_数值'] > 0].copy()
    df = df[df['仓位操作'] != '未知'].copy()
    df = df[df['方向'] != '未知'].copy()
    df = df[~df['交易时间'].isna()].copy()
    
    print(f"解析后有效记录: {len(df)} 条")
    
    return df


def fix_incomplete_pairs(df):
    """
    修复不完整的配对（配对ID只出现一次的情况）
    
    参数:
        df (pandas.DataFrame): 已配对的交易数据
    
    返回:
        pandas.DataFrame: 修复后的DataFrame
    """
    df = df.copy()
    max_iterations = 10  # 最大迭代次数，防止无限循环
    iteration = 0
    
    print(f"\n开始修复不完整配对...")
    
    while iteration < max_iterations:
        iteration += 1
        
        # 统计每个配对ID出现的次数（排除"未配对"和空值）
        paired_mask = (df['交易对ID'] != '') & (df['交易对ID'] != '未配对')
        paired_df = df[paired_mask]
        
        if len(paired_df) == 0:
            break
        
        # 统计每个配对ID出现的次数
        pair_id_counts = paired_df['交易对ID'].value_counts()
        
        # 找出只出现一次的配对ID（不完整配对）
        incomplete_pair_ids = pair_id_counts[pair_id_counts == 1].index.tolist()
        
        if len(incomplete_pair_ids) == 0:
            print(f"  迭代 {iteration}: 没有发现不完整配对，修复完成")
            break
        
        print(f"  迭代 {iteration}: 发现 {len(incomplete_pair_ids)} 个不完整配对")
        
        fixed_count = 0
        
        # 处理每个不完整配对
        for pair_id in incomplete_pair_ids:
            # 找到这个配对ID对应的交易
            pair_mask = df['交易对ID'] == pair_id
            incomplete_trade = df[pair_mask].iloc[0]
            
            target = incomplete_trade['标的_完整']
            direction = incomplete_trade['方向']
            operation = incomplete_trade['仓位操作']  # 开仓或平仓
            trade_time = incomplete_trade['交易时间']
            trade_qty = incomplete_trade['成交数量_数值']
            
            # 查找可匹配的未配对交易
            # 条件：相同标的、相同方向、配对ID为空或"未配对"
            unpaired_mask = (
                (df['标的_完整'] == target) &
                (df['方向'] == direction) &
                ((df['交易对ID'] == '') | (df['交易对ID'] == '未配对'))  # 未配对
            )
            unpaired_trades = df[unpaired_mask].copy()
            
            if len(unpaired_trades) == 0:
                continue
            
            # 根据不完整交易的类型，查找匹配的交易
            if operation == '开仓':
                # 如果是开仓，查找平仓交易（时间在开仓之后）
                matching_trades = unpaired_trades[
                    (unpaired_trades['仓位操作'] == '平仓') &
                    (unpaired_trades['交易时间'] >= trade_time)
                ].sort_values('交易时间')
            else:  # operation == '平仓'
                # 如果是平仓，查找开仓交易（时间在平仓之前）
                matching_trades = unpaired_trades[
                    (unpaired_trades['仓位操作'] == '开仓') &
                    (unpaired_trades['交易时间'] <= trade_time)
                ].sort_values('交易时间', ascending=False)  # 从后往前，优先匹配最近的
            
            if len(matching_trades) == 0:
                continue
            
            # 使用贪心算法累加匹配的交易
            remaining_qty = trade_qty
            matched_indices = []
            
            for idx in matching_trades.index:
                match_qty = df.loc[idx, '成交数量_数值']
                
                if remaining_qty <= 0:
                    break
                
                if match_qty <= remaining_qty:
                    # 这笔交易可以加入配对
                    matched_indices.append(idx)
                    remaining_qty -= match_qty
                else:
                    # 交易数量大于剩余需要量，跳过（保持精确匹配）
                    continue
            
            # 如果找到精确匹配的组合（剩余数量为0或接近0）
            if len(matched_indices) > 0 and abs(remaining_qty) < 0.01:
                # 将匹配的交易添加到对应的配对ID中
                for idx in matched_indices:
                    df.loc[idx, '交易对ID'] = pair_id
                
                fixed_count += len(matched_indices)
                
                if operation == '开仓':
                    print(f"    修复: {target} {direction} 1笔开仓({trade_qty:.0f}手) 匹配 {len(matched_indices)}笔平仓 -> {pair_id}")
                else:
                    print(f"    修复: {target} {direction} 1笔平仓({trade_qty:.0f}手) 匹配 {len(matched_indices)}笔开仓 -> {pair_id}")
        
        if fixed_count == 0:
            # 没有修复任何配对，退出循环
            print(f"  迭代 {iteration}: 无法修复更多配对，停止")
            break
        
        print(f"  迭代 {iteration}: 修复了 {fixed_count} 笔交易")
    
    # 最终检查：统计不完整配对
    paired_mask = (df['交易对ID'] != '') & (df['交易对ID'] != '未配对')
    paired_df = df[paired_mask]
    if len(paired_df) > 0:
        pair_id_counts = paired_df['交易对ID'].value_counts()
        incomplete_count = len(pair_id_counts[pair_id_counts == 1])
        if incomplete_count > 0:
            print(f"\n警告: 仍有 {incomplete_count} 个不完整配对无法修复")
        else:
            print(f"\n修复完成: 所有配对都已完整")
    
    return df


def pair_transactions(df):
    """
    为交易进行配对，分配交易对ID
    
    参数:
        df (pandas.DataFrame): 解析后的交易数据
    
    返回:
        pandas.DataFrame: 添加了交易对ID的DataFrame
    """
    df = df.copy()
    df['交易对ID'] = ''  # 初始化交易对ID列
    
    # 按交易时间排序
    df = df.sort_values('交易时间').reset_index(drop=True)
    
    pair_id_counter = 1  # 交易对ID计数器
    paired_count = 0  # 已配对交易数
    unpaired_count = 0  # 未配对交易数
    
    # 按标的分组
    grouped = df.groupby('标的_完整')
    
    print(f"\n开始配对，共有 {len(grouped)} 个不同标的")
    
    for target, group in grouped:
        # 再按方向分组（多/空）
        for direction in ['多', '空']:
            direction_group = group[group['方向'] == direction].copy()
            
            if len(direction_group) == 0:
                continue
            
            # 分离开仓和平仓交易
            open_trades = direction_group[direction_group['仓位操作'] == '开仓'].copy()
            close_trades = direction_group[direction_group['仓位操作'] == '平仓'].copy()
            
            if len(open_trades) == 0 or len(close_trades) == 0:
                # 标记未配对的交易
                for idx in direction_group.index:
                    df.loc[idx, '交易对ID'] = f'未配对'
                    unpaired_count += 1
                continue
            
            # 第零阶段：优先处理同一时间的交易（特别是数量相等的1开1平）
            # 按时间分组，处理同一时间点的开仓和平仓
            time_groups = direction_group.groupby('交易时间')
            for time_key, time_group in time_groups:
                time_open_trades = time_group[time_group['仓位操作'] == '开仓'].copy()
                time_close_trades = time_group[time_group['仓位操作'] == '平仓'].copy()
                
                if len(time_open_trades) == 0 or len(time_close_trades) == 0:
                    continue
                
                # 优先匹配数量完全相等的1开1平
                for close_idx in time_close_trades.index:
                    if df.loc[close_idx, '交易对ID'] != '':  # 已配对的平仓交易跳过
                        continue
                    
                    close_qty = df.loc[close_idx, '成交数量_数值']
                    
                    # 查找同一时间、数量相等的未配对开仓
                    matching_open = time_open_trades[
                        (time_open_trades['成交数量_数值'] == close_qty) &
                        (df.loc[time_open_trades.index, '交易对ID'] == '')
                    ]
                    
                    if len(matching_open) > 0:
                        # 找到匹配的开仓，优先使用第一个
                        open_idx = matching_open.index[0]
                        pair_id = f'P{pair_id_counter:04d}'
                        df.loc[open_idx, '交易对ID'] = pair_id
                        df.loc[close_idx, '交易对ID'] = pair_id
                        
                        paired_count += 2
                        pair_id_counter += 1
                        print(f"  同时间1开1平匹配: {target} {direction} {close_qty:.0f}手 -> {pair_id}")
            
            # 第一阶段：多开1平匹配
            # 遍历每笔平仓交易，查找可以合并匹配的多笔开仓
            for close_idx in close_trades.index:
                if df.loc[close_idx, '交易对ID'] != '':  # 已配对的平仓交易跳过
                    continue
                
                close_time = df.loc[close_idx, '交易时间']
                close_qty = df.loc[close_idx, '成交数量_数值']
                
                # 查找该平仓交易之前或同一时间的所有未配对开仓交易
                # 注意：同一时间的1开1平已经在第零阶段处理，这里主要处理多开1平的情况
                valid_open_trades = open_trades[
                    (open_trades['交易时间'] <= close_time) & 
                    (df.loc[open_trades.index, '交易对ID'] == '')  # 未被配对的开仓交易
                ].copy()
                
                if len(valid_open_trades) == 0:
                    continue
                
                # 尝试找到开仓数量之和等于平仓数量的组合
                # 使用贪心算法：按时间顺序累加开仓数量
                remaining_qty = close_qty
                paired_open_indices = []
                
                for open_idx in valid_open_trades.index:
                    open_qty = df.loc[open_idx, '成交数量_数值']
                    
                    if remaining_qty <= 0:
                        break
                    
                    if open_qty <= remaining_qty:
                        # 这笔开仓可以加入配对
                        paired_open_indices.append(open_idx)
                        remaining_qty -= open_qty
                    else:
                        # 开仓数量大于剩余需要量，跳过（保持精确匹配）
                        continue
                
                # 如果找到精确匹配的组合（剩余数量为0或接近0）
                if len(paired_open_indices) > 0 and abs(remaining_qty) < 0.01:
                    # 为配对的交易分配相同的ID
                    pair_id = f'P{pair_id_counter:04d}'
                    for open_idx in paired_open_indices:
                        df.loc[open_idx, '交易对ID'] = pair_id
                    df.loc[close_idx, '交易对ID'] = pair_id
                    
                    paired_count += len(paired_open_indices) + 1  # 开仓+平仓
                    pair_id_counter += 1
                    
                    if len(paired_open_indices) > 1:
                        print(f"  多开1平匹配: {target} {direction} {len(paired_open_indices)}笔开仓({sum([df.loc[idx, '成交数量_数值'] for idx in paired_open_indices]):.0f}手) 匹配 1笔平仓({close_qty:.0f}手) -> {pair_id}")
            
            # 第二阶段：1开多平匹配（原有逻辑）
            # 遍历每笔开仓交易，寻找匹配的平仓交易
            for open_idx in open_trades.index:
                # 跳过已配对的开仓交易
                if df.loc[open_idx, '交易对ID'] != '':
                    continue
                
                open_time = df.loc[open_idx, '交易时间']
                open_qty = df.loc[open_idx, '成交数量_数值']
                
                # 查找该开仓交易之后的平仓交易
                valid_close_trades = close_trades[
                    (close_trades['交易时间'] >= open_time) & 
                    (close_trades['交易对ID'] == '')  # 未被配对的平仓交易
                ].copy()
                
                if len(valid_close_trades) == 0:
                    # 没有找到匹配的平仓交易
                    df.loc[open_idx, '交易对ID'] = '未配对'
                    unpaired_count += 1
                    continue
                
                # 累计平仓数量，直到等于开仓数量
                remaining_qty = open_qty
                paired_close_indices = []
                
                for close_idx in valid_close_trades.index:
                    close_qty_val = df.loc[close_idx, '成交数量_数值']
                    
                    if remaining_qty <= 0:
                        break
                    
                    if close_qty_val <= remaining_qty:
                        # 这笔平仓完全匹配
                        paired_close_indices.append(close_idx)
                        remaining_qty -= close_qty_val
                    elif close_qty_val > remaining_qty:
                        # 这笔平仓数量大于剩余需要量，跳过（保持精确匹配）
                        # 只有在剩余数量很小时才允许部分匹配
                        continue
                
                # 为配对的交易分配相同的ID（要求精确匹配或接近精确匹配）
                if len(paired_close_indices) > 0 and abs(remaining_qty) < 0.01:
                    pair_id = f'P{pair_id_counter:04d}'
                    df.loc[open_idx, '交易对ID'] = pair_id
                    for close_idx in paired_close_indices:
                        df.loc[close_idx, '交易对ID'] = pair_id
                    
                    paired_count += len(paired_close_indices) + 1  # 开仓+平仓
                    pair_id_counter += 1
                    
                    if len(paired_close_indices) > 1:
                        print(f"  1开多平匹配: {target} {direction} 1笔开仓({open_qty:.0f}手) 匹配 {len(paired_close_indices)}笔平仓({sum([df.loc[idx, '成交数量_数值'] for idx in paired_close_indices]):.0f}手) -> {pair_id}")
                elif len(paired_close_indices) > 0:
                    # 部分匹配，发出警告但不配对
                    print(f"  警告: {target} {direction} 开仓在 {open_time} 有 {remaining_qty:.2f} 未配对，跳过配对")
                    df.loc[open_idx, '交易对ID'] = '未配对'
                    unpaired_count += 1
                else:
                    # 没有配对成功
                    df.loc[open_idx, '交易对ID'] = '未配对'
                    unpaired_count += 1
    
    # 统计信息
    print(f"\n配对完成:")
    print(f"  已配对交易: {paired_count} 条")
    print(f"  未配对交易: {unpaired_count} 条")
    print(f"  生成交易对: {pair_id_counter - 1} 对")
    
    # 后处理：修复不完整配对
    df = fix_incomplete_pairs(df)
    
    return df


def extract_symbol_core(symbol):
    """
    从标的字符串中提取标的核心字母

    参数:
        symbol (str): 标的字符串，如"10年期国债期货(T2006.CCFX)"

    返回:
        tuple: (括号内完整代码, 标的核心字母)
    """
    try:
        # 提取括号内的内容
        match = re.search(r'\(([^)]+)\)', symbol)
        if match:
            full_code = match.group(1)
            # 去掉后面的9位获取标的核心字母
            core_symbol = full_code[:-9] if len(full_code) > 9 else full_code
            return full_code, core_symbol
        else:
            return symbol, symbol
    except:
        return symbol, symbol


def identify_continuous_trade_pairs(df):
    """
    识别连续交易对

    参数:
        df (pandas.DataFrame): 包含交易对ID的交易数据

    返回:
        pandas.DataFrame: 添加了连续交易对ID的DataFrame
    """
    print("\n开始识别连续交易对...")

    df = df.copy()
    df['连续交易对ID'] = 'N/A'  # 初始化连续交易对ID列

    # 提取标的核心字母
    df['标的核心字母'] = df['标的'].apply(lambda x: extract_symbol_core(x)[1])

    # 获取所有已配对的交易对ID
    paired_mask = df['交易对ID'].str.startswith('P', na=False)
    paired_df = df[paired_mask].copy()

    if len(paired_df) == 0:
        print("没有已配对的交易")
        return df

    # 按交易对ID分组
    pair_groups = paired_df.groupby('交易对ID')

    # 存储连续交易对关系
    continuous_groups = []  # 每个元素是一组连续的交易对ID
    processed_pairs = set()  # 已处理的交易对ID

    for pair_id, group in pair_groups:
        if pair_id in processed_pairs:
            continue

        # 获取当前交易对的开仓和平仓记录
        open_trades = group[group['仓位操作'] == '开仓'].copy()
        close_trades = group[group['仓位操作'] == '平仓'].copy()

        if len(open_trades) == 0 or len(close_trades) == 0:
            continue

        # 获取当前交易对的关键信息
        current_core_symbol = group['标的核心字母'].iloc[0]
        current_direction = group['方向'].iloc[0]
        current_close_date = close_trades['日期'].iloc[0]
        current_close_time = close_trades['委托时间'].iloc[0]
        current_close_qty = close_trades['成交数量_数值'].sum()

        # 查找匹配的连续交易对
        matching_pairs = []

        for other_pair_id, other_group in pair_groups:
            if other_pair_id == pair_id or other_pair_id in processed_pairs:
                continue

            # 获取另一个交易对的开仓和平仓记录
            other_open_trades = other_group[other_group['仓位操作'] == '开仓'].copy()
            other_close_trades = other_group[other_group['仓位操作'] == '平仓'].copy()

            if len(other_open_trades) == 0 or len(other_close_trades) == 0:
                continue

            # 检查条件1：平仓和开仓的日期、委托时间完全一致
            other_open_date = other_open_trades['日期'].iloc[0]
            other_open_time = other_open_trades['委托时间'].iloc[0]

            # 由于可能有多个开仓，检查是否有至少一个与平仓时间完全一致
            time_match_found = False
            for _, open_trade in other_open_trades.iterrows():
                if (open_trade['日期'] == current_close_date and
                    open_trade['委托时间'] == current_close_time):
                    time_match_found = True
                    break

            if not time_match_found:
                continue

            # 检查条件2：交易类型匹配（平多对应开多，平空对应开空）
            other_direction = other_group['方向'].iloc[0]
            if current_direction != other_direction:
                continue

            # 检查条件3：标的核心字母一致
            other_core_symbol = other_group['标的核心字母'].iloc[0]
            if current_core_symbol != other_core_symbol:
                continue

            # 检查条件4：成交数量绝对值一致
            other_open_qty = other_open_trades['成交数量_数值'].sum()
            if abs(current_close_qty - other_open_qty) > 0.01:
                continue

            # 所有条件都满足，这是一个连续交易对
            matching_pairs.append(other_pair_id)
            processed_pairs.add(other_pair_id)

        # 如果找到匹配的连续交易对
        if matching_pairs:
            # 创建连续交易对组（包含当前交易对和所有匹配的交易对）
            continuous_group = [pair_id] + matching_pairs
            continuous_groups.append(continuous_group)
            processed_pairs.add(pair_id)

            print(f"  发现连续交易对组: {continuous_group}")
            print(f"    核心标的: {current_core_symbol}, 方向: {current_direction}")
            print(f"    平仓时间: {current_close_date} {current_close_time}")
            print(f"    平仓数量: {current_close_qty:.2f}, 开仓数量: {other_open_qty:.2f}")

    # 为连续交易对分配ID
    for i, continuous_group in enumerate(continuous_groups):
        continuous_id = f'C{i+1:04d}'
        for pair_id in continuous_group:
            mask = df['交易对ID'] == pair_id
            df.loc[mask, '连续交易对ID'] = continuous_id

    print(f"\n识别完成，共发现 {len(continuous_groups)} 组连续交易对")

    # 清理临时列
    df = df.drop('标的核心字母', axis=1)

    return df


def save_result(df, output_path):
    """
    保存配对结果到CSV文件

    参数:
        df (pandas.DataFrame): 包含交易对ID的DataFrame
        output_path (str): 输出文件路径
    """
    df = df.copy()

    # 添加"开仓时间"列
    # 对于每个交易对ID，找到对应的开仓记录的"最后更新时间"
    df['开仓时间'] = ''

    # 添加"交易盈亏"列，根据相同的交易对ID对平仓盈亏进行求和
    df['交易盈亏'] = ''

    # 添加"连续交易总盈亏"列
    df['连续交易总盈亏'] = 'N/A'

    # 先计算每个交易对的盈亏
    for pair_id in df['交易对ID'].unique():
        if pair_id and pair_id.startswith('P'):
            # 找到该交易对的所有记录
            pair_mask = df['交易对ID'] == pair_id
            pair_records = df[pair_mask]

            # 找到开仓记录（仓位操作为"开仓"）
            open_record = pair_records[pair_records['仓位操作'] == '开仓']

            if len(open_record) > 0:
                # 获取开仓记录的最后更新时间
                open_time = open_record.iloc[0]['最后更新时间']
                # 将开仓时间填充到该交易对的所有记录中
                df.loc[pair_mask, '开仓时间'] = open_time

            # 计算该交易对的总盈亏（对平仓盈亏求和）
            try:
                # 提取平仓盈亏列，转换为数值
                # 先转换为字符串，去掉千位分隔符（逗号），然后转换为数值
                close_profit_loss_str = pair_records['平仓盈亏'].astype(str).str.replace(',', '')
                # 尝试转换为数值，无法转换的设为0
                close_profit_loss_numeric = pd.to_numeric(close_profit_loss_str, errors='coerce').fillna(0)
                total_profit_loss = close_profit_loss_numeric.sum()
                # 将总盈亏填充到该交易对的所有记录中
                df.loc[pair_mask, '交易盈亏'] = total_profit_loss
            except Exception as e:
                # 如果计算失败，设为0
                df.loc[pair_mask, '交易盈亏'] = 0

    # 计算连续交易总盈亏
    for continuous_id in df['连续交易对ID'].unique():
        if continuous_id != 'N/A' and pd.notna(continuous_id):
            # 找到该连续交易组的所有记录
            continuous_mask = df['连续交易对ID'] == continuous_id
            continuous_records = df[continuous_mask]

            # 计算该连续交易组的总盈亏
            try:
                # 提取平仓盈亏列，转换为数值
                close_profit_loss_str = continuous_records['平仓盈亏'].astype(str).str.replace(',', '')
                # 尝试转换为数值，无法转换的设为0
                close_profit_loss_numeric = pd.to_numeric(close_profit_loss_str, errors='coerce').fillna(0)
                total_continuous_profit = close_profit_loss_numeric.sum()
                # 将连续交易总盈亏填充到该组的所有记录中
                df.loc[continuous_mask, '连续交易总盈亏'] = total_continuous_profit
            except Exception as e:
                # 如果计算失败，设为0
                df.loc[continuous_mask, '连续交易总盈亏'] = 0

    # 移除中间处理列
    columns_to_remove = ['标的_完整', '交易类型_标准', '仓位操作', '方向', '成交数量_数值', '交易时间']
    output_columns = [col for col in df.columns if col not in columns_to_remove]

    # 调整列顺序，确保交易对ID、连续交易对ID、开仓时间、交易盈亏和连续交易总盈亏在最后
    if '交易对ID' in output_columns:
        output_columns.remove('交易对ID')
    if '连续交易对ID' in output_columns:
        output_columns.remove('连续交易对ID')
    if '开仓时间' in output_columns:
        output_columns.remove('开仓时间')
    if '交易盈亏' in output_columns:
        output_columns.remove('交易盈亏')
    if '连续交易总盈亏' in output_columns:
        output_columns.remove('连续交易总盈亏')
    output_columns.append('交易对ID')
    output_columns.append('连续交易对ID')
    output_columns.append('开仓时间')
    output_columns.append('交易盈亏')
    output_columns.append('连续交易总盈亏')
    
    # 按交易对ID和日期升序排序
    # 创建排序辅助列：未配对的排在最后，其他按ID数字排序
    def get_sort_key(pair_id):
        if pd.isna(pair_id) or pair_id == '' or pair_id == '未配对':
            return (1, '')  # 未配对排在最后
        elif isinstance(pair_id, str) and pair_id.startswith('P'):
            try:
                # 提取数字部分用于排序
                num = int(pair_id[1:])
                return (0, num)  # 已配对的排在前面，按数字排序
            except:
                return (1, pair_id)
        else:
            return (1, str(pair_id))
    
    df['_sort_key_id'] = df['交易对ID'].apply(get_sort_key)
    
    # 确保日期列可以排序（转换为datetime类型）
    if '日期' in df.columns:
        df['_sort_date'] = pd.to_datetime(df['日期'], errors='coerce')
    else:
        df['_sort_date'] = pd.NaT
    
    # 先按交易对ID排序，再按日期排序
    df = df.sort_values(['_sort_key_id', '_sort_date'], ascending=[True, True]).reset_index(drop=True)
    df = df.drop(['_sort_key_id', '_sort_date'], axis=1)
    
    # 保存到CSV
    try:
        df[output_columns].to_csv(output_path, index=False, encoding='utf-8-sig')
        print(f"\n结果已保存到: {output_path}")
        
        # 获取文件大小
        file_size = os.path.getsize(output_path) / 1024  # KB
        print(f"文件大小: {file_size:.2f} KB")
    except Exception as e:
        print(f"保存文件时出错: {str(e)}")


def print_statistics(df):
    """
    打印配对统计信息

    参数:
        df (pandas.DataFrame): 包含交易对ID的DataFrame
    """
    print("\n" + "=" * 60)
    print("配对统计信息")
    print("=" * 60)

    # 统计已配对和未配对
    paired = df[df['交易对ID'].str.startswith('P', na=False)]
    unpaired = df[df['交易对ID'] == '未配对']

    print(f"\n总交易记录: {len(df)} 条")
    print(f"已配对交易: {len(paired)} 条 ({len(paired)/len(df)*100:.1f}%)")
    print(f"未配对交易: {len(unpaired)} 条 ({len(unpaired)/len(df)*100:.1f}%)")

    # 统计交易对数量
    unique_pairs = paired['交易对ID'].nunique()
    print(f"\n交易对数量: {unique_pairs} 对")

    # 统计连续交易对
    if '连续交易对ID' in df.columns:
        continuous_pairs = df[df['连续交易对ID'] != 'N/A']
        unique_continuous_pairs = continuous_pairs['连续交易对ID'].nunique()
        print(f"\n连续交易对统计:")
        print(f"  连续交易对数量: {unique_continuous_pairs} 组")
        print(f"  涉及交易记录: {len(continuous_pairs)} 条")
        if len(continuous_pairs) > 0:
            # 统计每组连续交易对的交易对数量
            continuous_stats = continuous_pairs.groupby('连续交易对ID')['交易对ID'].nunique()
            print(f"  每组连续交易对包含的交易对数量分布:")
            for continuous_id, pair_count in continuous_stats.items():
                print(f"    {continuous_id}: {pair_count} 个交易对")

    # 统计每个交易对的平仓次数分布
    if len(paired) > 0:
        pair_counts = paired.groupby('交易对ID').size()
        print(f"\n交易对组成分布:")
        distribution = pair_counts.value_counts().sort_index()
        for count, freq in distribution.items():
            if count == 2:
                print(f"  1开1平: {freq} 对")
            else:
                print(f"  1开{count-1}平: {freq} 对")

    # 按标的统计
    print(f"\n按标的统计:")
    target_stats = df.groupby('标的_完整')['交易对ID'].apply(
        lambda x: f"总:{len(x)}条, 已配对:{len(x[x.str.startswith('P', na=False)])}条"
    )
    for target, stats in target_stats.items():
        print(f"  {target}: {stats}")


def analyze_transaction_pairs(csv_filename=None, output_filename=None):
    """
    主函数：分析交易配对
    
    参数:
        csv_filename (str): 输入CSV文件名
        output_filename (str): 输出CSV文件名（可选）
    """
    print("=" * 60)
    print("交易配对分析工具")
    print("=" * 60)
    
    # 设置文件路径
    if csv_filename is None:
        csv_filename = 'transaction.csv'
    
    current_dir = _get_current_directory()
    csv_path = os.path.join(current_dir, csv_filename)
    
    if not os.path.exists(csv_path):
        print(f"错误: 文件不存在 - {csv_path}")
        return
    
    # 设置输出文件名
    if output_filename is None:
        timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
        base_name = os.path.splitext(csv_filename)[0]
        output_filename = f"{base_name}_paired_{timestamp}.csv"
    
    output_path = os.path.join(current_dir, output_filename)
    
    # 步骤1: 读取CSV
    print(f"\n步骤1: 读取CSV文件")
    print(f"文件路径: {csv_path}")
    df = read_transaction_csv(csv_path)
    
    if len(df) == 0:
        print("错误: 无法读取数据")
        return
    
    # 步骤2: 解析数据
    print(f"\n步骤2: 解析交易数据")
    df = parse_transaction_data(df)
    
    if len(df) == 0:
        print("错误: 没有有效的交易记录")
        return
    
    # 步骤3: 配对交易
    print(f"\n步骤3: 配对交易")
    df = pair_transactions(df)

    # 步骤4: 识别连续交易对
    print(f"\n步骤4: 识别连续交易对")
    df = identify_continuous_trade_pairs(df)

    # 步骤5: 保存结果
    print(f"\n步骤5: 保存结果")
    save_result(df, output_path)
    
    # 步骤6: 打印统计信息
    print_statistics(df)

    print("\n" + "=" * 60)
    print("分析完成")
    print("=" * 60)


# 使用示例
if __name__ == "__main__":
    # 可以指定CSV文件名，如果不指定则使用默认的 transaction.csv
    analyze_transaction_pairs()
    # 或者指定特定文件: analyze_transaction_pairs('transaction.csv')