||
- #!/usr/bin/env python
- # -*- coding: utf-8 -*-
- """
- 期货开仓记录分析工具
- 分析 records.csv 中的期货交易数据,提供多维度统计分析
- """
- import pandas as pd
- import numpy as np
- import matplotlib.pyplot as plt
- import seaborn as sns
- from datetime import datetime, timedelta
- import re
- import os
- import sys
- # 设置中文字体
- plt.rcParams['font.sans-serif'] = ['Arial Unicode MS', 'SimHei', 'DejaVu Sans']
- plt.rcParams['axes.unicode_minus'] = False
- # 期货配置字典(从MAPatternStrategy_v002.py复制)
- FUTURES_CONFIG = {
- # 贵金属
- 'AU': {'has_night_session': True, 'margin_rate': {'long': 0.21, 'short': 0.21}, 'multiplier': 1000, 'trading_start_time': '21:00'},
- 'AG': {'has_night_session': True, 'margin_rate': {'long': 0.22, 'short': 0.22}, 'multiplier': 15, 'trading_start_time': '21:00'},
-
- # 有色金属
- 'CU': {'has_night_session': True, 'margin_rate': {'long': 0.15, 'short': 0.15}, 'multiplier': 5, 'trading_start_time': '21:00'},
- 'AL': {'has_night_session': True, 'margin_rate': {'long': 0.15, 'short': 0.15}, 'multiplier': 5, 'trading_start_time': '21:00'},
- 'ZN': {'has_night_session': True, 'margin_rate': {'long': 0.14, 'short': 0.14}, 'multiplier': 5, 'trading_start_time': '21:00'},
- 'PB': {'has_night_session': True, 'margin_rate': {'long': 0.14, 'short': 0.14}, 'multiplier': 5, 'trading_start_time': '21:00'},
- 'NI': {'has_night_session': True, 'margin_rate': {'long': 0.16, 'short': 0.16}, 'multiplier': 1, 'trading_start_time': '21:00'},
- 'SN': {'has_night_session': True, 'margin_rate': {'long': 0.17, 'short': 0.17}, 'multiplier': 1, 'trading_start_time': '21:00'},
- 'SS': {'has_night_session': True, 'margin_rate': {'long': 0.07, 'short': 0.07}, 'multiplier': 5, 'trading_start_time': '21:00'},
-
- # 黑色系
- 'RB': {'has_night_session': True, 'margin_rate': {'long': 0.13, 'short': 0.13}, 'multiplier': 10, 'trading_start_time': '21:00'},
- 'HC': {'has_night_session': True, 'margin_rate': {'long': 0.13, 'short': 0.13}, 'multiplier': 10, 'trading_start_time': '21:00'},
- 'I': {'has_night_session': True, 'margin_rate': {'long': 0.16, 'short': 0.16}, 'multiplier': 100, 'trading_start_time': '21:00'},
- 'JM': {'has_night_session': True, 'margin_rate': {'long': 0.17, 'short': 0.17}, 'multiplier': 100, 'trading_start_time': '21:00'},
- 'J': {'has_night_session': True, 'margin_rate': {'long': 0.25, 'short': 0.25}, 'multiplier': 60, 'trading_start_time': '21:00'},
-
- # 能源化工
- 'SP': {'has_night_session': True, 'margin_rate': {'long': 0.13, 'short': 0.13}, 'multiplier': 10, 'trading_start_time': '21:00'},
- 'FU': {'has_night_session': True, 'margin_rate': {'long': 0.16, 'short': 0.16}, 'multiplier': 10, 'trading_start_time': '21:00'},
- 'BU': {'has_night_session': True, 'margin_rate': {'long': 0.17, 'short': 0.17}, 'multiplier': 10, 'trading_start_time': '21:00'},
- 'RU': {'has_night_session': True, 'margin_rate': {'long': 0.15, 'short': 0.15}, 'multiplier': 10, 'trading_start_time': '21:00'},
- 'BR': {'has_night_session': True, 'margin_rate': {'long': 0.15, 'short': 0.15}, 'multiplier': 5, 'trading_start_time': '21:00'},
- 'SC': {'has_night_session': True, 'margin_rate': {'long': 0.17, 'short': 0.17}, 'multiplier': 1000, 'trading_start_time': '21:00'},
- 'NR': {'has_night_session': True, 'margin_rate': {'long': 0.15, 'short': 0.15}, 'multiplier': 10, 'trading_start_time': '21:00'},
- 'LU': {'has_night_session': True, 'margin_rate': {'long': 0.17, 'short': 0.17}, 'multiplier': 10, 'trading_start_time': '21:00'},
- 'LC': {'has_night_session': False, 'margin_rate': {'long': 0.16, 'short': 0.16}, 'multiplier': 1, 'trading_start_time': '09:00'},
-
- # 化工
- 'FG': {'has_night_session': True, 'margin_rate': {'long': 0.16, 'short': 0.16}, 'multiplier': 20, 'trading_start_time': '21:00'},
- 'TA': {'has_night_session': True, 'margin_rate': {'long': 0.13, 'short': 0.13}, 'multiplier': 5, 'trading_start_time': '21:00'},
- 'MA': {'has_night_session': True, 'margin_rate': {'long': 0.12, 'short': 0.12}, 'multiplier': 10, 'trading_start_time': '21:00'},
- 'SA': {'has_night_session': True, 'margin_rate': {'long': 0.14, 'short': 0.14}, 'multiplier': 20, 'trading_start_time': '21:00'},
- 'L': {'has_night_session': True, 'margin_rate': {'long': 0.12, 'short': 0.12}, 'multiplier': 5, 'trading_start_time': '21:00'},
- 'V': {'has_night_session': True, 'margin_rate': {'long': 0.12, 'short': 0.12}, 'multiplier': 5, 'trading_start_time': '21:00'},
- 'EG': {'has_night_session': True, 'margin_rate': {'long': 0.12, 'short': 0.12}, 'multiplier': 10, 'trading_start_time': '21:00'},
- 'PP': {'has_night_session': True, 'margin_rate': {'long': 0.12, 'short': 0.12}, 'multiplier': 5, 'trading_start_time': '21:00'},
- 'EB': {'has_night_session': True, 'margin_rate': {'long': 0.12, 'short': 0.12}, 'multiplier': 5, 'trading_start_time': '21:00'},
- 'PG': {'has_night_session': True, 'margin_rate': {'long': 0.12, 'short': 0.12}, 'multiplier': 20, 'trading_start_time': '21:00'},
- 'PX': {'has_night_session': True, 'margin_rate': {'long': 0.1, 'short': 0.1}, 'multiplier': 5, 'trading_start_time': '21:00'},
-
- # 农产品
- 'RM': {'has_night_session': True, 'margin_rate': {'long': 0.13, 'short': 0.13}, 'multiplier': 10, 'trading_start_time': '21:00'},
- 'OI': {'has_night_session': True, 'margin_rate': {'long': 0.13, 'short': 0.13}, 'multiplier': 10, 'trading_start_time': '21:00'},
- 'CF': {'has_night_session': True, 'margin_rate': {'long': 0.13, 'short': 0.13}, 'multiplier': 5, 'trading_start_time': '21:00'},
- 'SR': {'has_night_session': True, 'margin_rate': {'long': 0.12, 'short': 0.12}, 'multiplier': 10, 'trading_start_time': '21:00'},
- 'PF': {'has_night_session': True, 'margin_rate': {'long': 0.13, 'short': 0.13}, 'multiplier': 5, 'trading_start_time': '21:00'},
- 'C': {'has_night_session': True, 'margin_rate': {'long': 0.12, 'short': 0.12}, 'multiplier': 10, 'trading_start_time': '21:00'},
- 'CS': {'has_night_session': True, 'margin_rate': {'long': 0.11, 'short': 0.11}, 'multiplier': 10, 'trading_start_time': '21:00'},
- 'CY': {'has_night_session': True, 'margin_rate': {'long': 0.11, 'short': 0.11}, 'multiplier': 5, 'trading_start_time': '21:00'},
- 'A': {'has_night_session': True, 'margin_rate': {'long': 0.12, 'short': 0.12}, 'multiplier': 10, 'trading_start_time': '21:00'},
- 'B': {'has_night_session': True, 'margin_rate': {'long': 0.12, 'short': 0.12}, 'multiplier': 10, 'trading_start_time': '21:00'},
- 'M': {'has_night_session': True, 'margin_rate': {'long': 0.12, 'short': 0.12}, 'multiplier': 10, 'trading_start_time': '21:00'},
- 'Y': {'has_night_session': True, 'margin_rate': {'long': 0.12, 'short': 0.12}, 'multiplier': 10, 'trading_start_time': '21:00'},
- 'P': {'has_night_session': True, 'margin_rate': {'long': 0.13, 'short': 0.13}, 'multiplier': 10, 'trading_start_time': '21:00'},
-
- # 无夜盘品种
- 'IF': {'has_night_session': False, 'margin_rate': {'long': 0.17, 'short': 0.17}, 'multiplier': 300, 'trading_start_time': '09:30'},
- 'IH': {'has_night_session': False, 'margin_rate': {'long': 0.15, 'short': 0.15}, 'multiplier': 300, 'trading_start_time': '09:30'},
- 'IC': {'has_night_session': False, 'margin_rate': {'long': 0.17, 'short': 0.17}, 'multiplier': 200, 'trading_start_time': '09:30'},
- 'IM': {'has_night_session': False, 'margin_rate': {'long': 0.17, 'short': 0.17}, 'multiplier': 200, 'trading_start_time': '09:30'},
- 'AP': {'has_night_session': False, 'margin_rate': {'long': 0.16, 'short': 0.16}, 'multiplier': 10, 'trading_start_time': '09:00'},
- 'CJ': {'has_night_session': False, 'margin_rate': {'long': 0.14, 'short': 0.14}, 'multiplier': 5, 'trading_start_time': '09:00'},
- 'PK': {'has_night_session': False, 'margin_rate': {'long': 0.13, 'short': 0.13}, 'multiplier': 5, 'trading_start_time': '09:00'},
- 'JD': {'has_night_session': False, 'margin_rate': {'long': 0.11, 'short': 0.11}, 'multiplier': 10, 'trading_start_time': '09:00'},
- 'LH': {'has_night_session': False, 'margin_rate': {'long': 0.13, 'short': 0.13}, 'multiplier': 16, 'trading_start_time': '09:00'},
- 'T': {'has_night_session': False, 'margin_rate': {'long': 0.03, 'short': 0.03}, 'multiplier': 1000000, 'trading_start_time': '09:30'},
- 'PS': {'has_night_session': False, 'margin_rate': {'long': 0.16, 'short': 0.16}, 'multiplier': 3, 'trading_start_time': '09:00'},
- 'UR': {'has_night_session': False, 'margin_rate': {'long': 0.14, 'short': 0.14}, 'multiplier': 20, 'trading_start_time': '09:00'},
- 'MO': {'has_night_session': True, 'margin_rate': {'long': 0.17, 'short': 0.17}, 'multiplier': 100, 'trading_start_time': '21:00'},
- 'HO': {'has_night_session': False, 'margin_rate': {'long': 0.15, 'short': 0.15}, 'multiplier': 100, 'trading_start_time': '09:30'},
- 'LG': {'has_night_session': True, 'margin_rate': {'long': 0.13, 'short': 0.13}, 'multiplier': 90, 'trading_start_time': '21:00'},
- 'EC': {'has_night_session': False, 'margin_rate': {'long': 0.23, 'short': 0.23}, 'multiplier': 50, 'trading_start_time': '09:00'},
- 'OP': {'has_night_session': False, 'margin_rate': {'long': 0.13, 'short': 0.13}, 'multiplier': 40, 'trading_start_time': '09:00'},
- 'BC': {'has_night_session': True, 'margin_rate': {'long': 0.15, 'short': 0.15}, 'multiplier': 5, 'trading_start_time': '21:00'},
- 'SH': {'has_night_session': True, 'margin_rate': {'long': 0.13, 'short': 0.13}, 'multiplier': 30, 'trading_start_time': '21:00'},
- 'TS': {'has_night_session': False, 'margin_rate': {'long': 0.015, 'short': 0.015}, 'multiplier': 2000000, 'trading_start_time': '09:30'},
- 'AD': {'has_night_session': False, 'margin_rate': {'long': 0.13, 'short': 0.13}, 'multiplier': 10, 'trading_start_time': '09:00'},
- 'PL': {'has_night_session': False, 'margin_rate': {'long': 0.12, 'short': 0.12}, 'multiplier': 20, 'trading_start_time': '09:00'},
- 'SI': {'has_night_session': False, 'margin_rate': {'long': 0.15, 'short': 0.15}, 'multiplier': 5, 'trading_start_time': '09:00'},
- 'SM': {'has_night_session': False, 'margin_rate': {'long': 0.15, 'short': 0.15}, 'multiplier': 5, 'trading_start_time': '09:00'},
- 'AO': {'has_night_session': True, 'margin_rate': {'long': 0.17, 'short': 0.17}, 'multiplier': 20, 'trading_start_time': '21:00'},
- 'TL': {'has_night_session': False, 'margin_rate': {'long': 0.045, 'short': 0.045}, 'multiplier': 1000000, 'trading_start_time': '09:00'},
- 'SF': {'has_night_session': False, 'margin_rate': {'long': 0.14, 'short': 0.14}, 'multiplier': 5, 'trading_start_time': '09:00'},
- 'PR': {'has_night_session': True, 'margin_rate': {'long': 0.12, 'short': 0.12}, 'multiplier': 15, 'trading_start_time': '21:00'},
- 'TF': {'has_night_session': False, 'margin_rate': {'long': 0.022, 'short': 0.022}, 'multiplier': 1000000, 'trading_start_time': '09:00'},
- 'BZ': {'has_night_session': False, 'margin_rate': {'long': 0.13, 'short': 0.13}, 'multiplier': 30, 'trading_start_time': '09:00'},
- }
- def extract_symbol_code(target_str):
- """
- 从标的字段提取品种代码
- 例如: '原油2002(SC2002.XINE)' -> 'SC'
- """
- match = re.search(r'\(([A-Z]+)\d+\.', target_str)
- if match:
- return match.group(1)
- return None
- def calculate_time_segment(order_time_str, trading_start_time_str):
- """
- 计算开仓时间相对于交易开始时间的时间段
- 返回: '<30分钟', '30-60分钟', '>1小时'
- """
- try:
- # 解析时间字符串
- order_time = datetime.strptime(order_time_str, '%H:%M:%S')
- start_time = datetime.strptime(trading_start_time_str, '%H:%M')
-
- # 如果是夜盘品种(21:00开盘),需要特殊处理跨日情况
- if trading_start_time_str == '21:00':
- # 如果委托时间在21:00之前,说明是第二天的交易时间
- if order_time.hour < 21 and order_time.hour >= 0:
- # 加24小时处理跨日
- order_time = order_time + timedelta(days=1)
-
- # 计算时间差(分钟)
- time_diff = (order_time - start_time).total_seconds() / 60
-
- # 处理负数情况(可能是跨日)
- if time_diff < 0:
- time_diff += 24 * 60
-
- # 分类
- if time_diff < 30:
- return '<30分钟'
- elif time_diff < 60:
- return '30-60分钟'
- else:
- return '>1小时'
- except Exception as e:
- print(f"时间计算错误: {order_time_str}, {trading_start_time_str}, {e}")
- return '未知'
- def calculate_session_type(order_time_str, has_night_session):
- """
- 计算交易时段类型:夜盘、上午、下午
- """
- try:
- order_time = datetime.strptime(order_time_str, '%H:%M:%S')
- hour = order_time.hour
-
- if has_night_session and (hour >= 21 or hour < 3):
- return '夜盘'
- elif 9 <= hour < 12:
- return '上午'
- elif 12 <= hour < 16:
- return '下午'
- else:
- return '其他'
- except:
- return '未知'
- def load_and_preprocess_data(csv_path):
- """
- 加载并预处理数据
- """
- print("正在加载数据...")
- df = pd.read_csv(csv_path)
-
- print(f"原始数据行数: {len(df)}")
- print(f"数据列: {df.columns.tolist()}")
-
- # 提取品种代码
- df['品种代码'] = df['标的'].apply(extract_symbol_code)
-
- # 获取品种配置信息
- df['trading_start_time'] = df['品种代码'].apply(
- lambda x: FUTURES_CONFIG.get(x, {}).get('trading_start_time', None)
- )
- df['has_night_session'] = df['品种代码'].apply(
- lambda x: FUTURES_CONFIG.get(x, {}).get('has_night_session', False)
- )
-
- # 计算开盘后时间段
- df['开盘后时间段'] = df.apply(
- lambda row: calculate_time_segment(row['委托时间'], row['trading_start_time'])
- if pd.notna(row['trading_start_time']) else '未知',
- axis=1
- )
-
- # 计算交易时段
- df['交易时段'] = df.apply(
- lambda row: calculate_session_type(row['委托时间'], row['has_night_session']),
- axis=1
- )
-
- # 计算保证金收益率
- df['保证金收益率'] = (df['交易盈亏'] / df['保证金']) * 100
-
- # 计算穿越均线数量
- df['穿越均线数量'] = df['crossed_ma_lines'].apply(
- lambda x: len(x.split(';')) if pd.notna(x) else 0
- )
-
- # 判断是否盈利
- df['是否盈利'] = df['交易盈亏'] > 0
-
- # 成交额分组
- df['成交额分组'] = pd.cut(df['成交额'],
- bins=[0, 100000, 200000, 500000, float('inf')],
- labels=['<10万', '10-20万', '20-50万', '>50万'])
-
- print(f"预处理后数据行数: {len(df)}")
- print(f"品种代码提取成功率: {df['品种代码'].notna().sum() / len(df) * 100:.2f}%")
-
- return df
- def calculate_statistics(group_df):
- """
- 计算统计指标
- """
- total_count = len(group_df)
- win_count = (group_df['交易盈亏'] > 0).sum()
- win_rate = win_count / total_count if total_count > 0 else 0
-
- avg_profit_loss = group_df['交易盈亏'].mean()
-
- # 计算盈亏比
- profit_trades = group_df[group_df['交易盈亏'] > 0]['交易盈亏']
- loss_trades = group_df[group_df['交易盈亏'] <= 0]['交易盈亏']
-
- avg_profit = profit_trades.mean() if len(profit_trades) > 0 else 0
- avg_loss = abs(loss_trades.mean()) if len(loss_trades) > 0 else 0
- profit_loss_ratio = avg_profit / avg_loss if avg_loss > 0 else np.inf
-
- avg_margin_return = group_df['保证金收益率'].mean()
-
- return pd.Series({
- '出现次数': total_count,
- '胜率': win_rate,
- '平均盈亏': avg_profit_loss,
- '盈亏比': profit_loss_ratio,
- '平均保证金收益率': avg_margin_return
- })
- def analyze_ma_lines(df):
- """
- 分析crossed_ma_lines维度
- """
- print("\n" + "="*80)
- print("均线组合分析")
- print("="*80)
-
- ma_stats = df.groupby('crossed_ma_lines').apply(calculate_statistics).round(4)
- ma_stats = ma_stats.sort_values('出现次数', ascending=False)
-
- print(ma_stats.to_string())
-
- return ma_stats
- def analyze_time_segment(df):
- """
- 分析开盘后时间段维度
- """
- print("\n" + "="*80)
- print("开盘后时间段分析")
- print("="*80)
-
- time_stats = df.groupby('开盘后时间段').apply(calculate_statistics).round(4)
-
- # 按指定顺序排列
- order = ['<30分钟', '30-60分钟', '>1小时', '未知']
- time_stats = time_stats.reindex([o for o in order if o in time_stats.index])
-
- print(time_stats.to_string())
-
- return time_stats
- def analyze_cross_dimension(df):
- """
- 交叉分析:均线组合 × 开盘后时间段
- """
- print("\n" + "="*80)
- print("交叉分析:均线组合 × 开盘后时间段")
- print("="*80)
-
- # 样本量分布
- cross_count = pd.crosstab(df['crossed_ma_lines'], df['开盘后时间段'])
- print("\n样本量分布:")
- print(cross_count.to_string())
-
- # 胜率对比
- cross_winrate = pd.crosstab(
- df['crossed_ma_lines'],
- df['开盘后时间段'],
- values=df['是否盈利'],
- aggfunc='mean'
- ).round(4)
- print("\n胜率对比:")
- print(cross_winrate.to_string())
-
- # 平均盈亏
- cross_profit = pd.crosstab(
- df['crossed_ma_lines'],
- df['开盘后时间段'],
- values=df['交易盈亏'],
- aggfunc='mean'
- ).round(2)
- print("\n平均盈亏:")
- print(cross_profit.to_string())
-
- # 平均保证金收益率
- cross_return = pd.crosstab(
- df['crossed_ma_lines'],
- df['开盘后时间段'],
- values=df['保证金收益率'],
- aggfunc='mean'
- ).round(4)
- print("\n平均保证金收益率(%):")
- print(cross_return.to_string())
-
- return cross_count, cross_winrate, cross_profit, cross_return
- def analyze_trade_type_and_variety(df):
- """
- 分析交易类型和品种维度
- """
- print("\n" + "="*80)
- print("交易类型分析")
- print("="*80)
-
- trade_type_stats = df.groupby('交易类型').apply(calculate_statistics).round(4)
- print(trade_type_stats.to_string())
-
- print("\n" + "="*80)
- print("品种类型分析")
- print("="*80)
-
- variety_stats = df.groupby('品种').apply(calculate_statistics).round(4)
- print(variety_stats.to_string())
-
- print("\n" + "="*80)
- print("具体品种代码分析(前20名)")
- print("="*80)
-
- symbol_stats = df.groupby('品种代码').apply(calculate_statistics).round(4)
- symbol_stats = symbol_stats.sort_values('出现次数', ascending=False).head(20)
- print(symbol_stats.to_string())
-
- return trade_type_stats, variety_stats, symbol_stats
- def analyze_additional_dimensions(df):
- """
- 其他维度分析
- """
- print("\n" + "="*80)
- print("成交额分组分析")
- print("="*80)
-
- amount_stats = df.groupby('成交额分组').apply(calculate_statistics).round(4)
- print(amount_stats.to_string())
-
- print("\n" + "="*80)
- print("交易时段分析")
- print("="*80)
-
- session_stats = df.groupby('交易时段').apply(calculate_statistics).round(4)
- print(session_stats.to_string())
-
- print("\n" + "="*80)
- print("穿越均线数量分析")
- print("="*80)
-
- ma_count_stats = df.groupby('穿越均线数量').apply(calculate_statistics).round(4)
- print(ma_count_stats.to_string())
-
- print("\n" + "="*80)
- print("多空对比(按均线组合)- 前10个组合")
- print("="*80)
-
- # 获取出现次数最多的前10个均线组合
- top_ma_lines = df['crossed_ma_lines'].value_counts().head(10).index
- df_top = df[df['crossed_ma_lines'].isin(top_ma_lines)]
-
- long_short_stats = df_top.groupby(['crossed_ma_lines', '交易类型']).apply(
- calculate_statistics
- ).round(4)
- print(long_short_stats.to_string())
-
- print("\n" + "="*80)
- print("品种特性分析(有夜盘 vs 无夜盘)")
- print("="*80)
-
- night_session_stats = df.groupby('has_night_session').apply(calculate_statistics).round(4)
- night_session_stats.index = ['无夜盘', '有夜盘']
- print(night_session_stats.to_string())
-
- print("\n" + "="*80)
- print("组合策略分析:最佳组合(样本量>=10)")
- print("="*80)
-
- # 三维组合分析
- combo_stats = df.groupby(['crossed_ma_lines', '开盘后时间段', '交易类型']).apply(
- calculate_statistics
- ).round(4)
-
- # 筛选样本量>=10的组合
- combo_stats = combo_stats[combo_stats['出现次数'] >= 10]
-
- # 按保证金收益率排序,显示前10
- combo_stats_sorted = combo_stats.sort_values('平均保证金收益率', ascending=False).head(10)
- print("\n保证金收益率最高的10个组合:")
- print(combo_stats_sorted.to_string())
-
- # 按胜率排序,显示前10
- combo_stats_sorted_winrate = combo_stats.sort_values('胜率', ascending=False).head(10)
- print("\n胜率最高的10个组合:")
- print(combo_stats_sorted_winrate.to_string())
-
- return {
- 'amount_stats': amount_stats,
- 'session_stats': session_stats,
- 'ma_count_stats': ma_count_stats,
- 'long_short_stats': long_short_stats,
- 'night_session_stats': night_session_stats,
- 'combo_stats': combo_stats
- }
- def create_visualizations(df, ma_stats, time_stats, cross_winrate, cross_profit, cross_return, output_dir):
- """
- 创建数据可视化图表
- """
- print("\n" + "="*80)
- print("生成可视化图表...")
- print("="*80)
-
- # 创建输出目录
- os.makedirs(output_dir, exist_ok=True)
-
- def annotate_barh(ax, bars, formatter=lambda v: f"{v:.0f}", offset_ratio=0.01):
- """
- 为水平柱状图添加数值标注
- """
- if bars is None or len(bars) == 0:
- return
- max_width = max((bar.get_width() for bar in bars), default=0)
- offset = max(max_width * offset_ratio, 0.5)
- for bar in bars:
- width = bar.get_width()
- if np.isnan(width):
- continue
- ha = 'left'
- x = width + offset
- if width < 0:
- ha = 'right'
- x = width - offset
- y = bar.get_y() + bar.get_height() / 2
- ax.text(x, y, formatter(width), va='center', ha=ha, fontsize=9)
-
- def annotate_bar(ax, bars, formatter=lambda v: f"{v:.0f}", offset_ratio=0.01):
- """
- 为垂直柱状图添加数值标注
- """
- if bars is None or len(bars) == 0:
- return
- max_height = max((bar.get_height() for bar in bars), default=0)
- offset = max(max_height * offset_ratio, 0.5)
- for bar in bars:
- height = bar.get_height()
- if np.isnan(height):
- continue
- va = 'bottom'
- y = height + offset
- if height < 0:
- va = 'top'
- y = height - offset
- x = bar.get_x() + bar.get_width() / 2
- ax.text(x, y, formatter(height), va=va, ha='center', fontsize=9)
-
- # 1. 均线组合表现对比(前15个)
- fig, axes = plt.subplots(2, 2, figsize=(16, 12))
-
- top_ma = ma_stats.head(15)
-
- # 出现次数
- bars = axes[0, 0].barh(range(len(top_ma)), top_ma['出现次数'])
- axes[0, 0].set_yticks(range(len(top_ma)))
- axes[0, 0].set_yticklabels(top_ma.index)
- axes[0, 0].set_xlabel('出现次数')
- axes[0, 0].set_title('均线组合出现次数(Top 15)')
- axes[0, 0].invert_yaxis()
- annotate_barh(axes[0, 0], bars)
-
- # 胜率
- colors = ['green' if x > 0.5 else 'red' for x in top_ma['胜率']]
- bars = axes[0, 1].barh(range(len(top_ma)), top_ma['胜率'], color=colors)
- axes[0, 1].set_yticks(range(len(top_ma)))
- axes[0, 1].set_yticklabels(top_ma.index)
- axes[0, 1].set_xlabel('胜率')
- axes[0, 1].set_title('均线组合胜率(Top 15)')
- axes[0, 1].axvline(x=0.5, color='black', linestyle='--', alpha=0.5)
- axes[0, 1].invert_yaxis()
- annotate_barh(axes[0, 1], bars, formatter=lambda v: f"{v:.1%}", offset_ratio=0.02)
-
- # 平均盈亏
- colors = ['green' if x > 0 else 'red' for x in top_ma['平均盈亏']]
- bars = axes[1, 0].barh(range(len(top_ma)), top_ma['平均盈亏'], color=colors)
- axes[1, 0].set_yticks(range(len(top_ma)))
- axes[1, 0].set_yticklabels(top_ma.index)
- axes[1, 0].set_xlabel('平均盈亏(元)')
- axes[1, 0].set_title('均线组合平均盈亏(Top 15)')
- axes[1, 0].axvline(x=0, color='black', linestyle='--', alpha=0.5)
- axes[1, 0].invert_yaxis()
- annotate_barh(axes[1, 0], bars, formatter=lambda v: f"{v:,.0f}", offset_ratio=0.015)
-
- # 保证金收益率
- colors = ['green' if x > 0 else 'red' for x in top_ma['平均保证金收益率']]
- bars = axes[1, 1].barh(range(len(top_ma)), top_ma['平均保证金收益率'], color=colors)
- axes[1, 1].set_yticks(range(len(top_ma)))
- axes[1, 1].set_yticklabels(top_ma.index)
- axes[1, 1].set_xlabel('平均保证金收益率(%)')
- axes[1, 1].set_title('均线组合平均保证金收益率(Top 15)')
- axes[1, 1].axvline(x=0, color='black', linestyle='--', alpha=0.5)
- axes[1, 1].invert_yaxis()
- annotate_barh(axes[1, 1], bars, formatter=lambda v: f"{v:.2f}%", offset_ratio=0.02)
-
- plt.tight_layout()
- plt.savefig(os.path.join(output_dir, 'ma_lines_analysis.png'), dpi=150, bbox_inches='tight')
- print(f"已保存: {os.path.join(output_dir, 'ma_lines_analysis.png')}")
- plt.close()
-
- # 2. 开盘后时间段表现
- fig, axes = plt.subplots(1, 2, figsize=(14, 6))
- order = ['<30分钟', '30-60分钟', '>1小时']
- time_stats_filtered = time_stats[time_stats.index.isin(order)]
- time_stats_filtered = time_stats_filtered.loc[[idx for idx in order if idx in time_stats_filtered.index]]
-
- profit_colors = ['green' if val >= 0 else 'red' for val in time_stats_filtered['平均盈亏']]
- bars = axes[0].bar(range(len(time_stats_filtered)), time_stats_filtered['平均盈亏'], color=profit_colors)
- axes[0].set_xticks(range(len(time_stats_filtered)))
- axes[0].set_xticklabels(time_stats_filtered.index)
- axes[0].set_ylabel('平均盈亏(元)')
- axes[0].set_title('不同时间段平均盈亏')
- axes[0].axhline(y=0, color='black', linestyle='--', alpha=0.5)
- annotate_bar(axes[0], bars, formatter=lambda v: f"{v:,.0f}")
-
- margin_colors = ['green' if val >= 0 else 'red' for val in time_stats_filtered['平均保证金收益率']]
- bars = axes[1].bar(range(len(time_stats_filtered)), time_stats_filtered['平均保证金收益率'], color=margin_colors)
- axes[1].set_xticks(range(len(time_stats_filtered)))
- axes[1].set_xticklabels(time_stats_filtered.index)
- axes[1].set_ylabel('平均保证金收益率(%)')
- axes[1].set_title('不同时间段平均保证金收益率')
- axes[1].axhline(y=0, color='black', linestyle='--', alpha=0.5)
- annotate_bar(axes[1], bars, formatter=lambda v: f"{v:.2f}%")
-
- plt.tight_layout()
- plt.savefig(os.path.join(output_dir, 'time_segment_analysis.png'), dpi=150, bbox_inches='tight')
- print(f"已保存: {os.path.join(output_dir, 'time_segment_analysis.png')}")
- plt.close()
-
- # 3. 交叉分析热力图
- fig, axes = plt.subplots(1, 3, figsize=(22, 10))
-
- # 选择前15个均线组合
- top_ma_lines = ma_stats.head(15).index
- heatmap_cols = ['<30分钟', '30-60分钟', '>1小时']
-
- def prepare_heatmap(table):
- filtered = table.reindex(index=[idx for idx in top_ma_lines if idx in table.index])
- if filtered.empty:
- return filtered
- cols = [col for col in heatmap_cols if col in filtered.columns]
- if cols:
- filtered = filtered[cols]
- return filtered
-
- cross_winrate_filtered = prepare_heatmap(cross_winrate)
- cross_profit_filtered = prepare_heatmap(cross_profit)
- cross_return_filtered = prepare_heatmap(cross_return)
-
- # 胜率热力图
- sns.heatmap(cross_winrate_filtered, annot=True, fmt='.2f', cmap='RdYlGn',
- center=0.5, vmin=0, vmax=1, ax=axes[0], cbar_kws={'label': '胜率'})
- axes[0].set_title('均线组合 × 时间段 胜率热力图(Top 15)')
- axes[0].set_xlabel('开盘后时间段')
- axes[0].set_ylabel('均线组合')
-
- # 平均盈亏热力图
- sns.heatmap(cross_profit_filtered, annot=True, fmt='.0f', cmap='RdYlGn', center=0,
- ax=axes[1], cbar_kws={'label': '平均盈亏(元)'})
- axes[1].set_title('均线组合 × 时间段 平均盈亏热力图(Top 15)')
- axes[1].set_xlabel('开盘后时间段')
- axes[1].set_ylabel('均线组合')
-
- # 平均保证金收益率热力图
- sns.heatmap(cross_return_filtered, annot=True, fmt='.2f', cmap='RdYlGn', center=0,
- ax=axes[2], cbar_kws={'label': '平均保证金收益率(%)'})
- axes[2].set_title('均线组合 × 时间段 平均保证金收益率热力图(Top 15)')
- axes[2].set_xlabel('开盘后时间段')
- axes[2].set_ylabel('均线组合')
-
- plt.tight_layout()
- plt.savefig(os.path.join(output_dir, 'cross_analysis_heatmap.png'), dpi=150, bbox_inches='tight')
- print(f"已保存: {os.path.join(output_dir, 'cross_analysis_heatmap.png')}")
- plt.close()
-
- # 4. 品种表现分析
- fig, axes = plt.subplots(2, 2, figsize=(16, 12))
-
- # 交易类型对比
- trade_type_stats = df.groupby('交易类型').apply(calculate_statistics)
- axes[0, 0].bar(trade_type_stats.index, trade_type_stats['胜率'],
- color=['green', 'red'])
- axes[0, 0].set_ylabel('胜率')
- axes[0, 0].set_title('交易类型胜率对比')
- axes[0, 0].axhline(y=0.5, color='black', linestyle='--', alpha=0.5)
-
- axes[0, 1].bar(trade_type_stats.index, trade_type_stats['平均保证金收益率'],
- color=['green', 'red'])
- axes[0, 1].set_ylabel('平均保证金收益率(%)')
- axes[0, 1].set_title('交易类型保证金收益率对比')
- axes[0, 1].axhline(y=0, color='black', linestyle='--', alpha=0.5)
-
- # 品种类型对比
- variety_stats = df.groupby('品种').apply(calculate_statistics)
- axes[1, 0].bar(variety_stats.index, variety_stats['胜率'])
- axes[1, 0].set_ylabel('胜率')
- axes[1, 0].set_title('品种类型胜率对比')
- axes[1, 0].axhline(y=0.5, color='black', linestyle='--', alpha=0.5)
-
- axes[1, 1].bar(variety_stats.index, variety_stats['平均保证金收益率'])
- axes[1, 1].set_ylabel('平均保证金收益率(%)')
- axes[1, 1].set_title('品种类型保证金收益率对比')
- axes[1, 1].axhline(y=0, color='black', linestyle='--', alpha=0.5)
-
- plt.tight_layout()
- plt.savefig(os.path.join(output_dir, 'variety_analysis.png'), dpi=150, bbox_inches='tight')
- print(f"已保存: {os.path.join(output_dir, 'variety_analysis.png')}")
- plt.close()
-
- print("\n所有图表已生成!")
- def save_results_to_csv(df, ma_stats, time_stats, output_dir):
- """
- 保存分析结果到CSV
- """
- print("\n" + "="*80)
- print("保存分析结果到CSV...")
- print("="*80)
-
- # 保存增强后的原始数据
- output_file = os.path.join(output_dir, 'records_enhanced.csv')
- df.to_csv(output_file, index=False, encoding='utf-8-sig')
- print(f"已保存增强数据: {output_file}")
-
- # 保存均线组合统计
- output_file = os.path.join(output_dir, 'ma_lines_stats.csv')
- ma_stats.to_csv(output_file, encoding='utf-8-sig')
- print(f"已保存均线组合统计: {output_file}")
-
- # 保存时间段统计
- output_file = os.path.join(output_dir, 'time_segment_stats.csv')
- time_stats.to_csv(output_file, encoding='utf-8-sig')
- print(f"已保存时间段统计: {output_file}")
-
- # 保存品种统计
- symbol_stats = df.groupby('品种代码').apply(calculate_statistics)
- output_file = os.path.join(output_dir, 'symbol_stats.csv')
- symbol_stats.to_csv(output_file, encoding='utf-8-sig')
- print(f"已保存品种统计: {output_file}")
-
- # 保存组合策略统计
- combo_stats = df.groupby(['crossed_ma_lines', '开盘后时间段', '交易类型']).apply(
- calculate_statistics
- )
- combo_stats = combo_stats[combo_stats['出现次数'] >= 5]
- combo_stats = combo_stats.sort_values('平均保证金收益率', ascending=False)
- output_file = os.path.join(output_dir, 'combo_strategy_stats.csv')
- combo_stats.to_csv(output_file, encoding='utf-8-sig')
- print(f"已保存组合策略统计: {output_file}")
- def main():
- """
- 主函数
- """
- # 设置路径
- script_dir = os.path.dirname(os.path.abspath(__file__))
- csv_path = os.path.join(script_dir, 'records.csv')
- output_dir = os.path.join('data', 'future', 'analysis_results')
-
- # 检查文件是否存在
- if not os.path.exists(csv_path):
- print(f"错误: 找不到文件 {csv_path}")
- return
-
- print("="*80)
- print("期货开仓记录分析工具")
- print("="*80)
-
- # 加载和预处理数据
- df = load_and_preprocess_data(csv_path)
-
- # 进行各维度分析
- ma_stats = analyze_ma_lines(df)
- time_stats = analyze_time_segment(df)
- cross_count, cross_winrate, cross_profit, cross_return = analyze_cross_dimension(df)
- trade_type_stats, variety_stats, symbol_stats = analyze_trade_type_and_variety(df)
- additional_stats = analyze_additional_dimensions(df)
-
- # 生成可视化图表
- create_visualizations(df, ma_stats, time_stats, cross_winrate, cross_profit, cross_return, output_dir)
-
- # 保存结果到CSV
- save_results_to_csv(df, ma_stats, time_stats, output_dir)
-
- print("\n" + "="*80)
- print("分析完成!")
- print(f"结果保存在: {output_dir}")
- print("="*80)
- if __name__ == '__main__':
- main()
|