| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151 |
- #!/usr/bin/env python
- # -*- coding: utf-8 -*-
- """
- 期货开仓记录分析工具
- 分析 records.csv 中的期货交易数据,提供多维度统计分析
- """
- import pandas as pd
- import numpy as np
- import matplotlib.pyplot as plt
- import seaborn as sns
- from datetime import datetime, timedelta
- import re
- import os
- import sys
- # 设置中文字体
- plt.rcParams['font.sans-serif'] = ['Arial Unicode MS', 'SimHei', 'DejaVu Sans']
- plt.rcParams['axes.unicode_minus'] = False
- # 期货配置字典(从MAPatternStrategy_v002.py复制)
- FUTURES_CONFIG = {
- # 贵金属
- 'AU': {'has_night_session': True, 'margin_rate': {'long': 0.21, 'short': 0.21}, 'multiplier': 1000, 'trading_start_time': '21:00'},
- 'AG': {'has_night_session': True, 'margin_rate': {'long': 0.22, 'short': 0.22}, 'multiplier': 15, 'trading_start_time': '21:00'},
-
- # 有色金属
- 'CU': {'has_night_session': True, 'margin_rate': {'long': 0.15, 'short': 0.15}, 'multiplier': 5, 'trading_start_time': '21:00'},
- 'AL': {'has_night_session': True, 'margin_rate': {'long': 0.15, 'short': 0.15}, 'multiplier': 5, 'trading_start_time': '21:00'},
- 'ZN': {'has_night_session': True, 'margin_rate': {'long': 0.14, 'short': 0.14}, 'multiplier': 5, 'trading_start_time': '21:00'},
- 'PB': {'has_night_session': True, 'margin_rate': {'long': 0.14, 'short': 0.14}, 'multiplier': 5, 'trading_start_time': '21:00'},
- 'NI': {'has_night_session': True, 'margin_rate': {'long': 0.16, 'short': 0.16}, 'multiplier': 1, 'trading_start_time': '21:00'},
- 'SN': {'has_night_session': True, 'margin_rate': {'long': 0.17, 'short': 0.17}, 'multiplier': 1, 'trading_start_time': '21:00'},
- 'SS': {'has_night_session': True, 'margin_rate': {'long': 0.07, 'short': 0.07}, 'multiplier': 5, 'trading_start_time': '21:00'},
-
- # 黑色系
- 'RB': {'has_night_session': True, 'margin_rate': {'long': 0.13, 'short': 0.13}, 'multiplier': 10, 'trading_start_time': '21:00'},
- 'HC': {'has_night_session': True, 'margin_rate': {'long': 0.13, 'short': 0.13}, 'multiplier': 10, 'trading_start_time': '21:00'},
- 'I': {'has_night_session': True, 'margin_rate': {'long': 0.16, 'short': 0.16}, 'multiplier': 100, 'trading_start_time': '21:00'},
- 'JM': {'has_night_session': True, 'margin_rate': {'long': 0.17, 'short': 0.17}, 'multiplier': 100, 'trading_start_time': '21:00'},
- 'J': {'has_night_session': True, 'margin_rate': {'long': 0.25, 'short': 0.25}, 'multiplier': 60, 'trading_start_time': '21:00'},
-
- # 能源化工
- 'SP': {'has_night_session': True, 'margin_rate': {'long': 0.13, 'short': 0.13}, 'multiplier': 10, 'trading_start_time': '21:00'},
- 'FU': {'has_night_session': True, 'margin_rate': {'long': 0.16, 'short': 0.16}, 'multiplier': 10, 'trading_start_time': '21:00'},
- 'BU': {'has_night_session': True, 'margin_rate': {'long': 0.17, 'short': 0.17}, 'multiplier': 10, 'trading_start_time': '21:00'},
- 'RU': {'has_night_session': True, 'margin_rate': {'long': 0.15, 'short': 0.15}, 'multiplier': 10, 'trading_start_time': '21:00'},
- 'BR': {'has_night_session': True, 'margin_rate': {'long': 0.15, 'short': 0.15}, 'multiplier': 5, 'trading_start_time': '21:00'},
- 'SC': {'has_night_session': True, 'margin_rate': {'long': 0.17, 'short': 0.17}, 'multiplier': 1000, 'trading_start_time': '21:00'},
- 'NR': {'has_night_session': True, 'margin_rate': {'long': 0.15, 'short': 0.15}, 'multiplier': 10, 'trading_start_time': '21:00'},
- 'LU': {'has_night_session': True, 'margin_rate': {'long': 0.17, 'short': 0.17}, 'multiplier': 10, 'trading_start_time': '21:00'},
- 'LC': {'has_night_session': False, 'margin_rate': {'long': 0.16, 'short': 0.16}, 'multiplier': 1, 'trading_start_time': '09:00'},
-
- # 化工
- 'FG': {'has_night_session': True, 'margin_rate': {'long': 0.16, 'short': 0.16}, 'multiplier': 20, 'trading_start_time': '21:00'},
- 'TA': {'has_night_session': True, 'margin_rate': {'long': 0.13, 'short': 0.13}, 'multiplier': 5, 'trading_start_time': '21:00'},
- 'MA': {'has_night_session': True, 'margin_rate': {'long': 0.12, 'short': 0.12}, 'multiplier': 10, 'trading_start_time': '21:00'},
- 'SA': {'has_night_session': True, 'margin_rate': {'long': 0.14, 'short': 0.14}, 'multiplier': 20, 'trading_start_time': '21:00'},
- 'L': {'has_night_session': True, 'margin_rate': {'long': 0.12, 'short': 0.12}, 'multiplier': 5, 'trading_start_time': '21:00'},
- 'V': {'has_night_session': True, 'margin_rate': {'long': 0.12, 'short': 0.12}, 'multiplier': 5, 'trading_start_time': '21:00'},
- 'EG': {'has_night_session': True, 'margin_rate': {'long': 0.12, 'short': 0.12}, 'multiplier': 10, 'trading_start_time': '21:00'},
- 'PP': {'has_night_session': True, 'margin_rate': {'long': 0.12, 'short': 0.12}, 'multiplier': 5, 'trading_start_time': '21:00'},
- 'EB': {'has_night_session': True, 'margin_rate': {'long': 0.12, 'short': 0.12}, 'multiplier': 5, 'trading_start_time': '21:00'},
- 'PG': {'has_night_session': True, 'margin_rate': {'long': 0.12, 'short': 0.12}, 'multiplier': 20, 'trading_start_time': '21:00'},
- 'PX': {'has_night_session': True, 'margin_rate': {'long': 0.1, 'short': 0.1}, 'multiplier': 5, 'trading_start_time': '21:00'},
-
- # 农产品
- 'RM': {'has_night_session': True, 'margin_rate': {'long': 0.13, 'short': 0.13}, 'multiplier': 10, 'trading_start_time': '21:00'},
- 'OI': {'has_night_session': True, 'margin_rate': {'long': 0.13, 'short': 0.13}, 'multiplier': 10, 'trading_start_time': '21:00'},
- 'CF': {'has_night_session': True, 'margin_rate': {'long': 0.13, 'short': 0.13}, 'multiplier': 5, 'trading_start_time': '21:00'},
- 'SR': {'has_night_session': True, 'margin_rate': {'long': 0.12, 'short': 0.12}, 'multiplier': 10, 'trading_start_time': '21:00'},
- 'PF': {'has_night_session': True, 'margin_rate': {'long': 0.13, 'short': 0.13}, 'multiplier': 5, 'trading_start_time': '21:00'},
- 'C': {'has_night_session': True, 'margin_rate': {'long': 0.12, 'short': 0.12}, 'multiplier': 10, 'trading_start_time': '21:00'},
- 'CS': {'has_night_session': True, 'margin_rate': {'long': 0.11, 'short': 0.11}, 'multiplier': 10, 'trading_start_time': '21:00'},
- 'CY': {'has_night_session': True, 'margin_rate': {'long': 0.11, 'short': 0.11}, 'multiplier': 5, 'trading_start_time': '21:00'},
- 'A': {'has_night_session': True, 'margin_rate': {'long': 0.12, 'short': 0.12}, 'multiplier': 10, 'trading_start_time': '21:00'},
- 'B': {'has_night_session': True, 'margin_rate': {'long': 0.12, 'short': 0.12}, 'multiplier': 10, 'trading_start_time': '21:00'},
- 'M': {'has_night_session': True, 'margin_rate': {'long': 0.12, 'short': 0.12}, 'multiplier': 10, 'trading_start_time': '21:00'},
- 'Y': {'has_night_session': True, 'margin_rate': {'long': 0.12, 'short': 0.12}, 'multiplier': 10, 'trading_start_time': '21:00'},
- 'P': {'has_night_session': True, 'margin_rate': {'long': 0.13, 'short': 0.13}, 'multiplier': 10, 'trading_start_time': '21:00'},
-
- # 无夜盘品种
- 'IF': {'has_night_session': False, 'margin_rate': {'long': 0.17, 'short': 0.17}, 'multiplier': 300, 'trading_start_time': '09:30'},
- 'IH': {'has_night_session': False, 'margin_rate': {'long': 0.15, 'short': 0.15}, 'multiplier': 300, 'trading_start_time': '09:30'},
- 'IC': {'has_night_session': False, 'margin_rate': {'long': 0.17, 'short': 0.17}, 'multiplier': 200, 'trading_start_time': '09:30'},
- 'IM': {'has_night_session': False, 'margin_rate': {'long': 0.17, 'short': 0.17}, 'multiplier': 200, 'trading_start_time': '09:30'},
- 'AP': {'has_night_session': False, 'margin_rate': {'long': 0.16, 'short': 0.16}, 'multiplier': 10, 'trading_start_time': '09:00'},
- 'CJ': {'has_night_session': False, 'margin_rate': {'long': 0.14, 'short': 0.14}, 'multiplier': 5, 'trading_start_time': '09:00'},
- 'PK': {'has_night_session': False, 'margin_rate': {'long': 0.13, 'short': 0.13}, 'multiplier': 5, 'trading_start_time': '09:00'},
- 'JD': {'has_night_session': False, 'margin_rate': {'long': 0.11, 'short': 0.11}, 'multiplier': 10, 'trading_start_time': '09:00'},
- 'LH': {'has_night_session': False, 'margin_rate': {'long': 0.13, 'short': 0.13}, 'multiplier': 16, 'trading_start_time': '09:00'},
- 'T': {'has_night_session': False, 'margin_rate': {'long': 0.03, 'short': 0.03}, 'multiplier': 1000000, 'trading_start_time': '09:30'},
- 'PS': {'has_night_session': False, 'margin_rate': {'long': 0.16, 'short': 0.16}, 'multiplier': 3, 'trading_start_time': '09:00'},
- 'UR': {'has_night_session': False, 'margin_rate': {'long': 0.14, 'short': 0.14}, 'multiplier': 20, 'trading_start_time': '09:00'},
- 'MO': {'has_night_session': True, 'margin_rate': {'long': 0.17, 'short': 0.17}, 'multiplier': 100, 'trading_start_time': '21:00'},
- 'HO': {'has_night_session': False, 'margin_rate': {'long': 0.15, 'short': 0.15}, 'multiplier': 100, 'trading_start_time': '09:30'},
- 'LG': {'has_night_session': True, 'margin_rate': {'long': 0.13, 'short': 0.13}, 'multiplier': 90, 'trading_start_time': '21:00'},
- 'EC': {'has_night_session': False, 'margin_rate': {'long': 0.23, 'short': 0.23}, 'multiplier': 50, 'trading_start_time': '09:00'},
- 'OP': {'has_night_session': False, 'margin_rate': {'long': 0.13, 'short': 0.13}, 'multiplier': 40, 'trading_start_time': '09:00'},
- 'BC': {'has_night_session': True, 'margin_rate': {'long': 0.15, 'short': 0.15}, 'multiplier': 5, 'trading_start_time': '21:00'},
- 'SH': {'has_night_session': True, 'margin_rate': {'long': 0.13, 'short': 0.13}, 'multiplier': 30, 'trading_start_time': '21:00'},
- 'TS': {'has_night_session': False, 'margin_rate': {'long': 0.015, 'short': 0.015}, 'multiplier': 2000000, 'trading_start_time': '09:30'},
- 'AD': {'has_night_session': False, 'margin_rate': {'long': 0.13, 'short': 0.13}, 'multiplier': 10, 'trading_start_time': '09:00'},
- 'PL': {'has_night_session': False, 'margin_rate': {'long': 0.12, 'short': 0.12}, 'multiplier': 20, 'trading_start_time': '09:00'},
- 'SI': {'has_night_session': False, 'margin_rate': {'long': 0.15, 'short': 0.15}, 'multiplier': 5, 'trading_start_time': '09:00'},
- 'SM': {'has_night_session': False, 'margin_rate': {'long': 0.15, 'short': 0.15}, 'multiplier': 5, 'trading_start_time': '09:00'},
- 'AO': {'has_night_session': True, 'margin_rate': {'long': 0.17, 'short': 0.17}, 'multiplier': 20, 'trading_start_time': '21:00'},
- 'TL': {'has_night_session': False, 'margin_rate': {'long': 0.045, 'short': 0.045}, 'multiplier': 1000000, 'trading_start_time': '09:00'},
- 'SF': {'has_night_session': False, 'margin_rate': {'long': 0.14, 'short': 0.14}, 'multiplier': 5, 'trading_start_time': '09:00'},
- 'PR': {'has_night_session': True, 'margin_rate': {'long': 0.12, 'short': 0.12}, 'multiplier': 15, 'trading_start_time': '21:00'},
- 'TF': {'has_night_session': False, 'margin_rate': {'long': 0.022, 'short': 0.022}, 'multiplier': 1000000, 'trading_start_time': '09:00'},
- 'BZ': {'has_night_session': False, 'margin_rate': {'long': 0.13, 'short': 0.13}, 'multiplier': 30, 'trading_start_time': '09:00'},
- }
- def extract_symbol_code(target_str):
- """
- 从标的字段提取品种代码
- 例如: '原油2002(SC2002.XINE)' -> 'SC'
- """
- match = re.search(r'\(([A-Z]+)\d+\.', target_str)
- if match:
- return match.group(1)
- return None
- DETAIL_KEYS = ['prev_close', 'open', 'MA5', 'MA10', 'MA20', 'MA30', 'MA60', 'AVG5']
- def parse_details(details_str):
- """
- 将details列拆解成独立字段
- """
- if pd.isna(details_str):
- return {}
- result = {}
- for part in str(details_str).split('|'):
- if ':' not in part:
- continue
- key, value = part.split(':', 1)
- key = key.strip()
- raw_value = value.strip()
- is_percent = False
- if raw_value.endswith('%'):
- is_percent = True
- raw_value = raw_value[:-1]
- try:
- numeric_value = float(raw_value)
- if is_percent:
- numeric_value /= 100
- result[key] = numeric_value
- except (ValueError, TypeError):
- result[key] = np.nan
- return result
- def infer_trade_direction(trade_type):
- """
- 根据交易类型推断方向(多/空)
- """
- if not isinstance(trade_type, str):
- return 'unknown'
- lowered = trade_type.lower()
- if '多' in trade_type or 'long' in lowered:
- return 'long'
- if '空' in trade_type or 'short' in lowered:
- return 'short'
- return 'unknown'
- def evaluate_trend_alignment(row):
- """
- 基于MA60、成交价、open与交易方向判断趋势是否一致
- """
- direction = infer_trade_direction(row.get('交易类型'))
- ma60 = row.get('MA60')
- trade_price = row.get('成交价')
- open_price = row.get('open')
- if direction == 'unknown' or pd.isna(ma60) or pd.isna(trade_price) or pd.isna(open_price):
- return '数据不足'
- if direction == 'long':
- return '一致' if trade_price >= ma60 and open_price >= ma60 else '不一致'
- if direction == 'short':
- return '一致' if trade_price <= ma60 and open_price <= ma60 else '不一致'
- return '数据不足'
- def calculate_ma_compaction(row, ma_columns):
- """
- 计算多条均线的聚合度:标准差 / 均值
- """
- values = [row.get(col) for col in ma_columns]
- if any(pd.isna(v) for v in values):
- return np.nan
- mean_val = np.mean(values)
- if mean_val == 0:
- return np.nan
- std_val = np.std(values, ddof=0)
- return std_val / mean_val
- def calculate_gap_ratio(prev_close, open_price):
- """
- 计算跳空比例
- """
- if pd.isna(prev_close) or prev_close == 0 or pd.isna(open_price):
- return np.nan
- return abs(open_price - prev_close) / abs(prev_close)
- def calculate_relative_gap(prev_close, open_price, avg_5day_change):
- """
- 计算相对跳空比例:跳空比率 / avg_5day_change
- """
- base_gap = calculate_gap_ratio(prev_close, open_price)
- if pd.isna(base_gap) or pd.isna(avg_5day_change) or avg_5day_change == 0:
- return np.nan
- return base_gap / abs(avg_5day_change)
- def assign_quantile_labels(df, source_col, target_col, q=4):
- """
- 根据分位数为连续指标打标签
- """
- labels = [f'Q{i+1}' for i in range(q)]
- valid = df[source_col].dropna()
- unique_count = valid.nunique()
- if valid.empty or unique_count < 2:
- df[target_col] = np.nan
- return
- bins = min(q, unique_count)
- quantiles = pd.qcut(valid, q=bins, labels=labels[:bins], duplicates='drop')
- df[target_col] = np.nan
- df.loc[quantiles.index, target_col] = quantiles.astype(str)
- def calculate_time_segment(order_time_str, trading_start_time_str):
- """
- 计算开仓时间相对于交易开始时间的时间段
- 返回: '<30分钟', '30-60分钟', '>1小时'
- """
- try:
- # 解析时间字符串
- order_time = datetime.strptime(order_time_str, '%H:%M:%S')
- start_time = datetime.strptime(trading_start_time_str, '%H:%M')
-
- # 如果是夜盘品种(21:00开盘),需要特殊处理跨日情况
- if trading_start_time_str == '21:00':
- # 如果委托时间在21:00之前,说明是第二天的交易时间
- if order_time.hour < 21 and order_time.hour >= 0:
- # 加24小时处理跨日
- order_time = order_time + timedelta(days=1)
-
- # 计算时间差(分钟)
- time_diff = (order_time - start_time).total_seconds() / 60
-
- # 处理负数情况(可能是跨日)
- if time_diff < 0:
- time_diff += 24 * 60
-
- # 分类
- if time_diff < 30:
- return '<30分钟'
- elif time_diff < 60:
- return '30-60分钟'
- else:
- return '>1小时'
- except Exception as e:
- print(f"时间计算错误: {order_time_str}, {trading_start_time_str}, {e}")
- return '未知'
- def calculate_session_type(order_time_str, has_night_session):
- """
- 计算交易时段类型:夜盘、上午、下午
- """
- try:
- order_time = datetime.strptime(order_time_str, '%H:%M:%S')
- hour = order_time.hour
-
- if has_night_session and (hour >= 21 or hour < 3):
- return '夜盘'
- elif 9 <= hour < 12:
- return '上午'
- elif 12 <= hour < 16:
- return '下午'
- else:
- return '其他'
- except:
- return '未知'
- def load_and_preprocess_data(csv_path):
- """
- 加载并预处理数据
- """
- print("正在加载数据...")
- df = pd.read_csv(csv_path)
-
- print(f"原始数据行数: {len(df)}")
- print(f"数据列: {df.columns.tolist()}")
- # 解析details列,补充所需字段
- if 'details' in df.columns:
- details_df = df['details'].apply(parse_details).apply(pd.Series)
- for key in DETAIL_KEYS:
- if key in details_df.columns:
- df[key] = details_df[key]
- elif key not in df.columns:
- df[key] = np.nan
- else:
- for key in DETAIL_KEYS:
- if key not in df.columns:
- df[key] = np.nan
- if '成交价' not in df.columns:
- df['成交价'] = np.nan
- avg5_col = 'avg_5day_change'
- if avg5_col not in df.columns:
- if 'AVG5' in df.columns:
- df[avg5_col] = df['AVG5']
- else:
- df[avg5_col] = np.nan
- else:
- if 'AVG5' in df.columns:
- df[avg5_col] = df[avg5_col].fillna(df['AVG5'])
-
- # 提取品种代码
- df['品种代码'] = df['标的'].apply(extract_symbol_code)
-
- # 获取品种配置信息
- df['trading_start_time'] = df['品种代码'].apply(
- lambda x: FUTURES_CONFIG.get(x, {}).get('trading_start_time', None)
- )
- df['has_night_session'] = df['品种代码'].apply(
- lambda x: FUTURES_CONFIG.get(x, {}).get('has_night_session', False)
- )
-
- # 计算开盘后时间段
- df['开盘后时间段'] = df.apply(
- lambda row: calculate_time_segment(row['委托时间'], row['trading_start_time'])
- if pd.notna(row['trading_start_time']) else '未知',
- axis=1
- )
-
- # 计算交易时段
- df['交易时段'] = df.apply(
- lambda row: calculate_session_type(row['委托时间'], row['has_night_session']),
- axis=1
- )
-
- # 计算保证金收益率
- df['保证金收益率'] = (df['交易盈亏'] / df['保证金']) * 100
-
- # 计算穿越均线数量
- df['穿越均线数量'] = df['crossed_ma_lines'].apply(
- lambda x: len(x.split(';')) if pd.notna(x) else 0
- )
-
- # 判断是否盈利
- df['是否盈利'] = df['交易盈亏'] > 0
-
- # 成交额分组
- df['成交额分组'] = pd.cut(df['成交额'],
- bins=[0, 100000, 200000, 500000, float('inf')],
- labels=['<10万', '10-20万', '20-50万', '>50万'])
- # 趋势一致性与衍生指标
- df['趋势一致'] = df.apply(evaluate_trend_alignment, axis=1)
- df['均线聚合度_5_10_20_30'] = df.apply(
- lambda row: calculate_ma_compaction(row, ['MA5', 'MA10', 'MA20', 'MA30']), axis=1
- )
- df['均线聚合度_5_10_20'] = df.apply(
- lambda row: calculate_ma_compaction(row, ['MA5', 'MA10', 'MA20']), axis=1
- )
- assign_quantile_labels(df, '均线聚合度_5_10_20_30', '均线聚合度_5_10_20_30_分位')
- assign_quantile_labels(df, '均线聚合度_5_10_20', '均线聚合度_5_10_20_分位')
- df['跳空比率'] = df.apply(
- lambda row: calculate_gap_ratio(row.get('prev_close'), row.get('open')), axis=1
- )
- df['跳空相对波动'] = df.apply(
- lambda row: calculate_relative_gap(row.get('prev_close'), row.get('open'), row.get('avg_5day_change')),
- axis=1
- )
- assign_quantile_labels(df, '跳空相对波动', '跳空相对波动分位')
-
- print(f"预处理后数据行数: {len(df)}")
- print(f"品种代码提取成功率: {df['品种代码'].notna().sum() / len(df) * 100:.2f}%")
-
- return df
- def calculate_statistics(group_df):
- """
- 计算统计指标
- """
- total_count = len(group_df)
- win_count = (group_df['交易盈亏'] > 0).sum()
- win_rate = win_count / total_count if total_count > 0 else 0
-
- avg_profit_loss = group_df['交易盈亏'].mean()
-
- # 计算盈亏比
- profit_trades = group_df[group_df['交易盈亏'] > 0]['交易盈亏']
- loss_trades = group_df[group_df['交易盈亏'] <= 0]['交易盈亏']
-
- avg_profit = profit_trades.mean() if len(profit_trades) > 0 else 0
- avg_loss = abs(loss_trades.mean()) if len(loss_trades) > 0 else 0
- profit_loss_ratio = avg_profit / avg_loss if avg_loss > 0 else np.inf
-
- avg_margin_return = group_df['保证金收益率'].mean()
-
- return pd.Series({
- '出现次数': total_count,
- '胜率': win_rate,
- '平均盈亏': avg_profit_loss,
- '盈亏比': profit_loss_ratio,
- '平均保证金收益率': avg_margin_return
- })
- def analyze_ma_lines(df):
- """
- 分析crossed_ma_lines维度
- """
- print("\n" + "="*80)
- print("均线组合分析")
- print("="*80)
-
- ma_stats = df.groupby('crossed_ma_lines').apply(calculate_statistics).round(4)
- ma_stats = ma_stats.sort_values('出现次数', ascending=False)
-
- print(ma_stats.to_string())
-
- return ma_stats
- def analyze_time_segment(df):
- """
- 分析开盘后时间段维度
- """
- print("\n" + "="*80)
- print("开盘后时间段分析")
- print("="*80)
-
- time_stats = df.groupby('开盘后时间段').apply(calculate_statistics).round(4)
-
- # 按指定顺序排列
- order = ['<30分钟', '30-60分钟', '>1小时', '未知']
- time_stats = time_stats.reindex([o for o in order if o in time_stats.index])
-
- print(time_stats.to_string())
-
- return time_stats
- def analyze_cross_dimension(df):
- """
- 交叉分析:均线组合 × 开盘后时间段
- """
- print("\n" + "="*80)
- print("交叉分析:均线组合 × 开盘后时间段")
- print("="*80)
-
- # 样本量分布
- cross_count = pd.crosstab(df['crossed_ma_lines'], df['开盘后时间段'])
- print("\n样本量分布:")
- print(cross_count.to_string())
-
- # 胜率对比
- cross_winrate = pd.crosstab(
- df['crossed_ma_lines'],
- df['开盘后时间段'],
- values=df['是否盈利'],
- aggfunc='mean'
- ).round(4)
- print("\n胜率对比:")
- print(cross_winrate.to_string())
-
- # 平均盈亏
- cross_profit = pd.crosstab(
- df['crossed_ma_lines'],
- df['开盘后时间段'],
- values=df['交易盈亏'],
- aggfunc='mean'
- ).round(2)
- print("\n平均盈亏:")
- print(cross_profit.to_string())
-
- # 平均保证金收益率
- cross_return = pd.crosstab(
- df['crossed_ma_lines'],
- df['开盘后时间段'],
- values=df['保证金收益率'],
- aggfunc='mean'
- ).round(4)
- print("\n平均保证金收益率(%):")
- print(cross_return.to_string())
-
- return cross_count, cross_winrate, cross_profit, cross_return
- def analyze_trade_type_and_variety(df):
- """
- 分析交易类型和品种维度
- """
- print("\n" + "="*80)
- print("交易类型分析")
- print("="*80)
-
- trade_type_stats = df.groupby('交易类型').apply(calculate_statistics).round(4)
- print(trade_type_stats.to_string())
-
- print("\n" + "="*80)
- print("品种类型分析")
- print("="*80)
-
- variety_stats = df.groupby('品种').apply(calculate_statistics).round(4)
- print(variety_stats.to_string())
-
- print("\n" + "="*80)
- print("具体品种代码分析(前20名)")
- print("="*80)
-
- symbol_stats = df.groupby('品种代码').apply(calculate_statistics).round(4)
- symbol_stats = symbol_stats.sort_values('出现次数', ascending=False).head(20)
- print(symbol_stats.to_string())
-
- return trade_type_stats, variety_stats, symbol_stats
- def analyze_additional_dimensions(df):
- """
- 其他维度分析
- """
- print("\n" + "="*80)
- print("成交额分组分析")
- print("="*80)
-
- amount_stats = df.groupby('成交额分组').apply(calculate_statistics).round(4)
- print(amount_stats.to_string())
-
- print("\n" + "="*80)
- print("交易时段分析")
- print("="*80)
-
- session_stats = df.groupby('交易时段').apply(calculate_statistics).round(4)
- print(session_stats.to_string())
-
- print("\n" + "="*80)
- print("穿越均线数量分析")
- print("="*80)
-
- ma_count_stats = df.groupby('穿越均线数量').apply(calculate_statistics).round(4)
- print(ma_count_stats.to_string())
-
- print("\n" + "="*80)
- print("多空对比(按均线组合)- 前10个组合")
- print("="*80)
-
- # 获取出现次数最多的前10个均线组合
- top_ma_lines = df['crossed_ma_lines'].value_counts().head(10).index
- df_top = df[df['crossed_ma_lines'].isin(top_ma_lines)]
-
- long_short_stats = df_top.groupby(['crossed_ma_lines', '交易类型']).apply(
- calculate_statistics
- ).round(4)
- print(long_short_stats.to_string())
-
- print("\n" + "="*80)
- print("品种特性分析(有夜盘 vs 无夜盘)")
- print("="*80)
-
- night_session_stats = df.groupby('has_night_session').apply(calculate_statistics).round(4)
- night_session_stats.index = ['无夜盘', '有夜盘']
- print(night_session_stats.to_string())
-
- print("\n" + "="*80)
- print("组合策略分析:最佳组合(样本量>=10)")
- print("="*80)
-
- # 三维组合分析
- combo_stats = df.groupby(['crossed_ma_lines', '开盘后时间段', '交易类型']).apply(
- calculate_statistics
- ).round(4)
-
- # 筛选样本量>=10的组合
- combo_stats = combo_stats[combo_stats['出现次数'] >= 10]
-
- # 按保证金收益率排序,显示前10
- combo_stats_sorted = combo_stats.sort_values('平均保证金收益率', ascending=False).head(10)
- print("\n保证金收益率最高的10个组合:")
- print(combo_stats_sorted.to_string())
-
- # 按胜率排序,显示前10
- combo_stats_sorted_winrate = combo_stats.sort_values('胜率', ascending=False).head(10)
- print("\n胜率最高的10个组合:")
- print(combo_stats_sorted_winrate.to_string())
-
- return {
- 'amount_stats': amount_stats,
- 'session_stats': session_stats,
- 'ma_count_stats': ma_count_stats,
- 'long_short_stats': long_short_stats,
- 'night_session_stats': night_session_stats,
- 'combo_stats': combo_stats
- }
- def analyze_trend_alignment(df):
- """
- 趋势一致性分析
- """
- print("\n" + "="*80)
- print("趋势一致性分析(基于MA60 vs 成交价/open)")
- print("="*80)
-
- if df['趋势一致'].dropna().empty:
- print("暂无可用数据")
- return None
-
- trend_stats = df.groupby('趋势一致').apply(calculate_statistics).round(4)
- print(trend_stats.to_string())
-
- return trend_stats
- def analyze_ma_compaction(df):
- """
- 均线聚合度分析
- """
- print("\n" + "="*80)
- print("均线聚合度分析(标准差/均值)")
- print("="*80)
-
- compaction_results = {}
- config = [
- ('均线聚合度_5_10_20_30', 'MA5/MA10/MA20/MA30', '均线聚合度_5_10_20_30_分位', 'ma_compaction_ma5_ma30'),
- ('均线聚合度_5_10_20', 'MA5/MA10/MA20', '均线聚合度_5_10_20_分位', 'ma_compaction_ma5_ma20')
- ]
-
- for col, label, quantile_col, result_key in config:
- print(f"\n--- {label} ---")
- if col not in df.columns or df[col].dropna().empty:
- print("数据不足,无法分析。")
- compaction_results[result_key] = None
- continue
-
- print(f"{label} 描述统计:均值={df[col].mean():.4f}, 中位数={df[col].median():.4f}, 最大值={df[col].max():.4f}")
- if quantile_col not in df.columns or df[quantile_col].dropna().empty:
- print("分位标签缺失,跳过统计。")
- compaction_results[result_key] = None
- continue
-
- stats = df.groupby(quantile_col).apply(calculate_statistics).round(4)
- print(stats.to_string())
- compaction_results[result_key] = stats
-
- return compaction_results
- def analyze_gap_behavior(df):
- """
- 跳空行为分析
- """
- print("\n" + "="*80)
- print("跳空行为分析")
- print("="*80)
-
- if '跳空比率' not in df.columns or df['跳空比率'].dropna().empty:
- print("缺少跳空数据,无法分析。")
- return None
-
- print(f"跳空比率描述:均值={df['跳空比率'].mean():.4f}, 最大值={df['跳空比率'].max():.4f}")
- if '跳空相对波动' in df.columns and not df['跳空相对波动'].dropna().empty:
- print(f"跳空相对波动描述:均值={df['跳空相对波动'].mean():.4f}, 最大值={df['跳空相对波动'].max():.4f}")
-
- if '跳空相对波动分位' not in df.columns or df['跳空相对波动分位'].dropna().empty:
- print("跳空相对波动分位标签缺失,跳过分组统计。")
- return None
-
- gap_stats = df.groupby('跳空相对波动分位').apply(calculate_statistics).round(4)
- print("\n按跳空相对波动分位的表现:")
- print(gap_stats.to_string())
-
- return gap_stats
- def analyze_enhanced_cross_metrics(df):
- """
- 将新增指标与核心维度(均线组合、开盘后时间段)交叉对比
- """
- print("\n" + "="*80)
- print("扩展指标交叉分析(趋势一致/均线聚合度/跳空 vs 核心维度)")
- print("="*80)
- config = [
- (['crossed_ma_lines', '趋势一致'], '趋势一致 × 均线组合', 'trend_vs_ma'),
- (['开盘后时间段', '趋势一致'], '趋势一致 × 开盘后时间段', 'trend_vs_time'),
- (['crossed_ma_lines', '均线聚合度_5_10_20_30_分位'], '均线聚合度(4) × 均线组合', 'ma_compact4_vs_ma'),
- (['开盘后时间段', '均线聚合度_5_10_20_30_分位'], '均线聚合度(4) × 开盘后时间段', 'ma_compact4_vs_time'),
- (['crossed_ma_lines', '均线聚合度_5_10_20_分位'], '均线聚合度(3) × 均线组合', 'ma_compact3_vs_ma'),
- (['开盘后时间段', '均线聚合度_5_10_20_分位'], '均线聚合度(3) × 开盘后时间段', 'ma_compact3_vs_time'),
- (['crossed_ma_lines', '跳空相对波动分位'], '跳空相对波动 × 均线组合', 'gap_vs_ma'),
- (['开盘后时间段', '跳空相对波动分位'], '跳空相对波动 × 开盘后时间段', 'gap_vs_time'),
- ]
- results = {}
- for group_cols, title, key in config:
- missing_cols = [col for col in group_cols if col not in df.columns]
- if missing_cols:
- print(f"\n{title}: 缺少列 {missing_cols},跳过。")
- results[key] = None
- continue
- if df[group_cols[1]].dropna().empty:
- print(f"\n{title}: 数据不足,跳过。")
- results[key] = None
- continue
- stats = df.groupby(group_cols).apply(calculate_statistics).round(4)
- print(f"\n{title}")
- print(stats.to_string())
- results[key] = stats
- return results
- def create_visualizations(df, ma_stats, time_stats, cross_winrate, cross_profit, cross_return, output_dir):
- """
- 创建数据可视化图表
- """
- print("\n" + "="*80)
- print("生成可视化图表...")
- print("="*80)
-
- # 创建输出目录
- os.makedirs(output_dir, exist_ok=True)
-
- def annotate_barh(ax, bars, formatter=lambda v: f"{v:.0f}", offset_ratio=0.01):
- """
- 为水平柱状图添加数值标注
- """
- if bars is None or len(bars) == 0:
- return
- max_width = max((bar.get_width() for bar in bars), default=0)
- offset = max(max_width * offset_ratio, 0.5)
- for bar in bars:
- width = bar.get_width()
- if np.isnan(width):
- continue
- ha = 'left'
- x = width + offset
- if width < 0:
- ha = 'right'
- x = width - offset
- y = bar.get_y() + bar.get_height() / 2
- ax.text(x, y, formatter(width), va='center', ha=ha, fontsize=9)
-
- def annotate_bar(ax, bars, formatter=lambda v: f"{v:.0f}", offset_ratio=0.01):
- """
- 为垂直柱状图添加数值标注
- """
- if bars is None or len(bars) == 0:
- return
- max_height = max((bar.get_height() for bar in bars), default=0)
- offset = max(max_height * offset_ratio, 0.5)
- for bar in bars:
- height = bar.get_height()
- if np.isnan(height):
- continue
- va = 'bottom'
- y = height + offset
- if height < 0:
- va = 'top'
- y = height - offset
- x = bar.get_x() + bar.get_width() / 2
- ax.text(x, y, formatter(height), va=va, ha='center', fontsize=9)
-
- # 1. 均线组合表现对比(前15个)
- fig, axes = plt.subplots(2, 2, figsize=(16, 12))
-
- top_ma = ma_stats.head(15)
-
- # 出现次数
- bars = axes[0, 0].barh(range(len(top_ma)), top_ma['出现次数'])
- axes[0, 0].set_yticks(range(len(top_ma)))
- axes[0, 0].set_yticklabels(top_ma.index)
- axes[0, 0].set_xlabel('出现次数')
- axes[0, 0].set_title('均线组合出现次数(Top 15)')
- axes[0, 0].invert_yaxis()
- annotate_barh(axes[0, 0], bars)
-
- # 胜率
- colors = ['green' if x > 0.5 else 'red' for x in top_ma['胜率']]
- bars = axes[0, 1].barh(range(len(top_ma)), top_ma['胜率'], color=colors)
- axes[0, 1].set_yticks(range(len(top_ma)))
- axes[0, 1].set_yticklabels(top_ma.index)
- axes[0, 1].set_xlabel('胜率')
- axes[0, 1].set_title('均线组合胜率(Top 15)')
- axes[0, 1].axvline(x=0.5, color='black', linestyle='--', alpha=0.5)
- axes[0, 1].invert_yaxis()
- annotate_barh(axes[0, 1], bars, formatter=lambda v: f"{v:.1%}", offset_ratio=0.02)
-
- # 平均盈亏
- colors = ['green' if x > 0 else 'red' for x in top_ma['平均盈亏']]
- bars = axes[1, 0].barh(range(len(top_ma)), top_ma['平均盈亏'], color=colors)
- axes[1, 0].set_yticks(range(len(top_ma)))
- axes[1, 0].set_yticklabels(top_ma.index)
- axes[1, 0].set_xlabel('平均盈亏(元)')
- axes[1, 0].set_title('均线组合平均盈亏(Top 15)')
- axes[1, 0].axvline(x=0, color='black', linestyle='--', alpha=0.5)
- axes[1, 0].invert_yaxis()
- annotate_barh(axes[1, 0], bars, formatter=lambda v: f"{v:,.0f}", offset_ratio=0.015)
-
- # 保证金收益率
- colors = ['green' if x > 0 else 'red' for x in top_ma['平均保证金收益率']]
- bars = axes[1, 1].barh(range(len(top_ma)), top_ma['平均保证金收益率'], color=colors)
- axes[1, 1].set_yticks(range(len(top_ma)))
- axes[1, 1].set_yticklabels(top_ma.index)
- axes[1, 1].set_xlabel('平均保证金收益率(%)')
- axes[1, 1].set_title('均线组合平均保证金收益率(Top 15)')
- axes[1, 1].axvline(x=0, color='black', linestyle='--', alpha=0.5)
- axes[1, 1].invert_yaxis()
- annotate_barh(axes[1, 1], bars, formatter=lambda v: f"{v:.2f}%", offset_ratio=0.02)
-
- plt.tight_layout()
- plt.savefig(os.path.join(output_dir, 'ma_lines_analysis.png'), dpi=150, bbox_inches='tight')
- print(f"已保存: {os.path.join(output_dir, 'ma_lines_analysis.png')}")
- plt.close()
-
- # 2. 开盘后时间段表现
- fig, axes = plt.subplots(1, 2, figsize=(14, 6))
- order = ['<30分钟', '30-60分钟', '>1小时']
- time_stats_filtered = time_stats[time_stats.index.isin(order)]
- time_stats_filtered = time_stats_filtered.loc[[idx for idx in order if idx in time_stats_filtered.index]]
-
- profit_colors = ['green' if val >= 0 else 'red' for val in time_stats_filtered['平均盈亏']]
- bars = axes[0].bar(range(len(time_stats_filtered)), time_stats_filtered['平均盈亏'], color=profit_colors)
- axes[0].set_xticks(range(len(time_stats_filtered)))
- axes[0].set_xticklabels(time_stats_filtered.index)
- axes[0].set_ylabel('平均盈亏(元)')
- axes[0].set_title('不同时间段平均盈亏')
- axes[0].axhline(y=0, color='black', linestyle='--', alpha=0.5)
- annotate_bar(axes[0], bars, formatter=lambda v: f"{v:,.0f}")
-
- margin_colors = ['green' if val >= 0 else 'red' for val in time_stats_filtered['平均保证金收益率']]
- bars = axes[1].bar(range(len(time_stats_filtered)), time_stats_filtered['平均保证金收益率'], color=margin_colors)
- axes[1].set_xticks(range(len(time_stats_filtered)))
- axes[1].set_xticklabels(time_stats_filtered.index)
- axes[1].set_ylabel('平均保证金收益率(%)')
- axes[1].set_title('不同时间段平均保证金收益率')
- axes[1].axhline(y=0, color='black', linestyle='--', alpha=0.5)
- annotate_bar(axes[1], bars, formatter=lambda v: f"{v:.2f}%")
-
- plt.tight_layout()
- plt.savefig(os.path.join(output_dir, 'time_segment_analysis.png'), dpi=150, bbox_inches='tight')
- print(f"已保存: {os.path.join(output_dir, 'time_segment_analysis.png')}")
- plt.close()
-
- # 3. 交叉分析热力图
- fig, axes = plt.subplots(1, 3, figsize=(22, 10))
-
- # 选择前15个均线组合
- top_ma_lines = ma_stats.head(15).index
- heatmap_cols = ['<30分钟', '30-60分钟', '>1小时']
-
- def prepare_heatmap(table):
- filtered = table.reindex(index=[idx for idx in top_ma_lines if idx in table.index])
- if filtered.empty:
- return filtered
- cols = [col for col in heatmap_cols if col in filtered.columns]
- if cols:
- filtered = filtered[cols]
- return filtered
-
- cross_winrate_filtered = prepare_heatmap(cross_winrate)
- cross_profit_filtered = prepare_heatmap(cross_profit)
- cross_return_filtered = prepare_heatmap(cross_return)
-
- # 胜率热力图
- sns.heatmap(cross_winrate_filtered, annot=True, fmt='.2f', cmap='RdYlGn',
- center=0.5, vmin=0, vmax=1, ax=axes[0], cbar_kws={'label': '胜率'})
- axes[0].set_title('均线组合 × 时间段 胜率热力图(Top 15)')
- axes[0].set_xlabel('开盘后时间段')
- axes[0].set_ylabel('均线组合')
-
- # 平均盈亏热力图
- sns.heatmap(cross_profit_filtered, annot=True, fmt='.0f', cmap='RdYlGn', center=0,
- ax=axes[1], cbar_kws={'label': '平均盈亏(元)'})
- axes[1].set_title('均线组合 × 时间段 平均盈亏热力图(Top 15)')
- axes[1].set_xlabel('开盘后时间段')
- axes[1].set_ylabel('均线组合')
-
- # 平均保证金收益率热力图
- sns.heatmap(cross_return_filtered, annot=True, fmt='.2f', cmap='RdYlGn', center=0,
- ax=axes[2], cbar_kws={'label': '平均保证金收益率(%)'})
- axes[2].set_title('均线组合 × 时间段 平均保证金收益率热力图(Top 15)')
- axes[2].set_xlabel('开盘后时间段')
- axes[2].set_ylabel('均线组合')
-
- plt.tight_layout()
- plt.savefig(os.path.join(output_dir, 'cross_analysis_heatmap.png'), dpi=150, bbox_inches='tight')
- print(f"已保存: {os.path.join(output_dir, 'cross_analysis_heatmap.png')}")
- plt.close()
-
- # 4. 品种表现分析
- fig, axes = plt.subplots(2, 2, figsize=(16, 12))
-
- # 交易类型对比
- trade_type_stats = df.groupby('交易类型').apply(calculate_statistics)
- axes[0, 0].bar(trade_type_stats.index, trade_type_stats['胜率'],
- color=['green', 'red'])
- axes[0, 0].set_ylabel('胜率')
- axes[0, 0].set_title('交易类型胜率对比')
- axes[0, 0].axhline(y=0.5, color='black', linestyle='--', alpha=0.5)
-
- axes[0, 1].bar(trade_type_stats.index, trade_type_stats['平均保证金收益率'],
- color=['green', 'red'])
- axes[0, 1].set_ylabel('平均保证金收益率(%)')
- axes[0, 1].set_title('交易类型保证金收益率对比')
- axes[0, 1].axhline(y=0, color='black', linestyle='--', alpha=0.5)
-
- # 品种类型对比
- variety_stats = df.groupby('品种').apply(calculate_statistics)
- axes[1, 0].bar(variety_stats.index, variety_stats['胜率'])
- axes[1, 0].set_ylabel('胜率')
- axes[1, 0].set_title('品种类型胜率对比')
- axes[1, 0].axhline(y=0.5, color='black', linestyle='--', alpha=0.5)
-
- axes[1, 1].bar(variety_stats.index, variety_stats['平均保证金收益率'])
- axes[1, 1].set_ylabel('平均保证金收益率(%)')
- axes[1, 1].set_title('品种类型保证金收益率对比')
- axes[1, 1].axhline(y=0, color='black', linestyle='--', alpha=0.5)
-
- plt.tight_layout()
- plt.savefig(os.path.join(output_dir, 'variety_analysis.png'), dpi=150, bbox_inches='tight')
- print(f"已保存: {os.path.join(output_dir, 'variety_analysis.png')}")
- plt.close()
- # 5. 扩展指标热力图
- def build_metric_pivot(source_df, row_field, col_field, value_field, row_order=None, col_order=None):
- filtered = source_df.dropna(subset=[col_field])
- if row_order is not None:
- filtered = filtered[filtered[row_field].isin(row_order)]
- pivot = pd.pivot_table(
- filtered,
- index=row_field,
- columns=col_field,
- values=value_field,
- aggfunc='mean'
- )
- if row_order is not None:
- pivot = pivot.reindex([idx for idx in row_order if idx in pivot.index])
- if col_order is not None:
- pivot = pivot[[col for col in col_order if col in pivot.columns]]
- return pivot
- def plot_heatmap(ax, data, title, fmt='.2f', center=None, vmin=None, vmax=None, cbar_label=''):
- if data.empty:
- ax.axis('off')
- ax.set_title(f"{title}(数据不足)")
- return
- sns.heatmap(
- data,
- annot=True,
- fmt=fmt,
- cmap='RdYlGn',
- center=center,
- vmin=vmin,
- vmax=vmax,
- ax=ax,
- cbar_kws={'label': cbar_label}
- )
- ax.set_title(title)
- ax.set_xlabel(data.columns.name or '')
- ax.set_ylabel(data.index.name or '')
- enhanced_metric_configs = [
- ('趋势一致', 'trend_alignment_cross.png', '趋势一致', ['一致', '不一致', '数据不足']),
- ('均线聚合度_5_10_20_30_分位', 'ma_compaction_4lines_cross.png', '均线聚合度(MA5/MA10/MA20/MA30)', None),
- ('均线聚合度_5_10_20_分位', 'ma_compaction_3lines_cross.png', '均线聚合度(MA5/MA10/MA20)', None),
- ('跳空相对波动分位', 'gap_behavior_cross.png', '跳空相对波动', None),
- ]
- row_configs = [
- ('crossed_ma_lines', 'Top 15 均线组合', list(ma_stats.head(15).index)),
- ('开盘后时间段', '开盘后时间段', ['<30分钟', '30-60分钟', '>1小时', '未知']),
- ]
- value_configs = [
- ('是否盈利', '胜率', '.2f', 0.5, 0, 1, '胜率'),
- ('交易盈亏', '平均盈亏', '.0f', 0, None, None, '平均盈亏(元)'),
- ('保证金收益率', '平均保证金收益率', '.2f', 0, None, None, '平均保证金收益率(%)'),
- ]
- for metric_field, filename, metric_title, col_order in enhanced_metric_configs:
- if metric_field not in df.columns or df[metric_field].dropna().empty:
- continue
- fig, axes = plt.subplots(len(row_configs), len(value_configs), figsize=(22, 12))
- for row_idx, (row_field, row_label, row_order) in enumerate(row_configs):
- for col_idx, (value_field, value_label, fmt, center, vmin, vmax, cbar_label) in enumerate(value_configs):
- ax = axes[row_idx, col_idx]
- pivot = build_metric_pivot(df, row_field, metric_field, value_field, row_order=row_order, col_order=col_order)
- ax.set_title(f"{row_label} - {value_label}")
- plot_heatmap(
- ax,
- pivot,
- f"{row_label} - {value_label}",
- fmt=fmt,
- center=center,
- vmin=vmin,
- vmax=vmax,
- cbar_label=cbar_label
- )
- ax.set_xlabel(metric_title)
- ax.set_ylabel(row_label)
- plt.suptitle(f"{metric_title} × 核心维度表现", fontsize=16)
- plt.tight_layout(rect=[0, 0, 1, 0.97])
- output_path = os.path.join(output_dir, filename)
- plt.savefig(output_path, dpi=150, bbox_inches='tight')
- print(f"已保存: {output_path}")
- plt.close()
-
- print("\n所有图表已生成!")
- def save_results_to_csv(df, ma_stats, time_stats, output_dir,
- trend_alignment_stats=None, ma_compaction_stats=None,
- gap_stats=None, enhanced_cross_stats=None):
- """
- 保存分析结果到CSV
- """
- print("\n" + "="*80)
- print("保存分析结果到CSV...")
- print("="*80)
-
- # 保存增强后的原始数据
- output_file = os.path.join(output_dir, 'records_enhanced.csv')
- df.to_csv(output_file, index=False, encoding='utf-8-sig')
- print(f"已保存增强数据: {output_file}")
-
- # 保存均线组合统计
- output_file = os.path.join(output_dir, 'ma_lines_stats.csv')
- ma_stats.to_csv(output_file, encoding='utf-8-sig')
- print(f"已保存均线组合统计: {output_file}")
-
- # 保存时间段统计
- output_file = os.path.join(output_dir, 'time_segment_stats.csv')
- time_stats.to_csv(output_file, encoding='utf-8-sig')
- print(f"已保存时间段统计: {output_file}")
-
- # 保存品种统计
- symbol_stats = df.groupby('品种代码').apply(calculate_statistics)
- output_file = os.path.join(output_dir, 'symbol_stats.csv')
- symbol_stats.to_csv(output_file, encoding='utf-8-sig')
- print(f"已保存品种统计: {output_file}")
-
- # 保存组合策略统计
- combo_stats = df.groupby(['crossed_ma_lines', '开盘后时间段', '交易类型']).apply(
- calculate_statistics
- )
- combo_stats = combo_stats[combo_stats['出现次数'] >= 5]
- combo_stats = combo_stats.sort_values('平均保证金收益率', ascending=False)
- output_file = os.path.join(output_dir, 'combo_strategy_stats.csv')
- combo_stats.to_csv(output_file, encoding='utf-8-sig')
- print(f"已保存组合策略统计: {output_file}")
- if trend_alignment_stats is not None:
- output_file = os.path.join(output_dir, 'trend_alignment_stats.csv')
- trend_alignment_stats.to_csv(output_file, encoding='utf-8-sig')
- print(f"已保存趋势一致性统计: {output_file}")
- if ma_compaction_stats:
- for key, stats in ma_compaction_stats.items():
- if stats is None:
- continue
- output_file = os.path.join(output_dir, f'{key}_stats.csv')
- stats.to_csv(output_file, encoding='utf-8-sig')
- print(f"已保存均线聚合度统计: {output_file}")
- if gap_stats is not None:
- output_file = os.path.join(output_dir, 'gap_behavior_stats.csv')
- gap_stats.to_csv(output_file, encoding='utf-8-sig')
- print(f"已保存跳空行为统计: {output_file}")
- if enhanced_cross_stats:
- for key, stats in enhanced_cross_stats.items():
- if stats is None:
- continue
- output_file = os.path.join(output_dir, f'{key}_stats.csv')
- stats.to_csv(output_file, encoding='utf-8-sig')
- print(f"已保存 {key} 交叉统计: {output_file}")
- def main():
- """
- 主函数
- """
- # 设置路径
- script_dir = os.path.dirname(os.path.abspath(__file__))
- csv_path = os.path.join(script_dir, 'records.csv')
- output_dir = os.path.join('data', 'future', 'analysis_results')
-
- # 检查文件是否存在
- if not os.path.exists(csv_path):
- print(f"错误: 找不到文件 {csv_path}")
- return
-
- print("="*80)
- print("期货开仓记录分析工具")
- print("="*80)
-
- # 加载和预处理数据
- df = load_and_preprocess_data(csv_path)
-
- # 进行各维度分析
- ma_stats = analyze_ma_lines(df)
- time_stats = analyze_time_segment(df)
- cross_count, cross_winrate, cross_profit, cross_return = analyze_cross_dimension(df)
- trade_type_stats, variety_stats, symbol_stats = analyze_trade_type_and_variety(df)
- additional_stats = analyze_additional_dimensions(df)
- trend_alignment_stats = analyze_trend_alignment(df)
- ma_compaction_stats = analyze_ma_compaction(df)
- gap_stats = analyze_gap_behavior(df)
- enhanced_cross_stats = analyze_enhanced_cross_metrics(df)
-
- # 生成可视化图表
- create_visualizations(df, ma_stats, time_stats, cross_winrate, cross_profit, cross_return, output_dir)
-
- # 保存结果到CSV
- save_results_to_csv(
- df,
- ma_stats,
- time_stats,
- output_dir,
- trend_alignment_stats=trend_alignment_stats,
- ma_compaction_stats=ma_compaction_stats,
- gap_stats=gap_stats,
- enhanced_cross_stats=enhanced_cross_stats
- )
-
- print("\n" + "="*80)
- print("分析完成!")
- print(f"结果保存在: {output_dir}")
- print("="*80)
- if __name__ == '__main__':
- main()
|