records_analysis.py 48 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. """
  4. 期货开仓记录分析工具
  5. 分析 records.csv 中的期货交易数据,提供多维度统计分析
  6. """
  7. import pandas as pd
  8. import numpy as np
  9. import matplotlib.pyplot as plt
  10. import seaborn as sns
  11. from datetime import datetime, timedelta
  12. import re
  13. import os
  14. import sys
  15. # 设置中文字体
  16. plt.rcParams['font.sans-serif'] = ['Arial Unicode MS', 'SimHei', 'DejaVu Sans']
  17. plt.rcParams['axes.unicode_minus'] = False
  18. # 期货配置字典(从MAPatternStrategy_v002.py复制)
  19. FUTURES_CONFIG = {
  20. # 贵金属
  21. 'AU': {'has_night_session': True, 'margin_rate': {'long': 0.21, 'short': 0.21}, 'multiplier': 1000, 'trading_start_time': '21:00'},
  22. 'AG': {'has_night_session': True, 'margin_rate': {'long': 0.22, 'short': 0.22}, 'multiplier': 15, 'trading_start_time': '21:00'},
  23. # 有色金属
  24. 'CU': {'has_night_session': True, 'margin_rate': {'long': 0.15, 'short': 0.15}, 'multiplier': 5, 'trading_start_time': '21:00'},
  25. 'AL': {'has_night_session': True, 'margin_rate': {'long': 0.15, 'short': 0.15}, 'multiplier': 5, 'trading_start_time': '21:00'},
  26. 'ZN': {'has_night_session': True, 'margin_rate': {'long': 0.14, 'short': 0.14}, 'multiplier': 5, 'trading_start_time': '21:00'},
  27. 'PB': {'has_night_session': True, 'margin_rate': {'long': 0.14, 'short': 0.14}, 'multiplier': 5, 'trading_start_time': '21:00'},
  28. 'NI': {'has_night_session': True, 'margin_rate': {'long': 0.16, 'short': 0.16}, 'multiplier': 1, 'trading_start_time': '21:00'},
  29. 'SN': {'has_night_session': True, 'margin_rate': {'long': 0.17, 'short': 0.17}, 'multiplier': 1, 'trading_start_time': '21:00'},
  30. 'SS': {'has_night_session': True, 'margin_rate': {'long': 0.07, 'short': 0.07}, 'multiplier': 5, 'trading_start_time': '21:00'},
  31. # 黑色系
  32. 'RB': {'has_night_session': True, 'margin_rate': {'long': 0.13, 'short': 0.13}, 'multiplier': 10, 'trading_start_time': '21:00'},
  33. 'HC': {'has_night_session': True, 'margin_rate': {'long': 0.13, 'short': 0.13}, 'multiplier': 10, 'trading_start_time': '21:00'},
  34. 'I': {'has_night_session': True, 'margin_rate': {'long': 0.16, 'short': 0.16}, 'multiplier': 100, 'trading_start_time': '21:00'},
  35. 'JM': {'has_night_session': True, 'margin_rate': {'long': 0.17, 'short': 0.17}, 'multiplier': 100, 'trading_start_time': '21:00'},
  36. 'J': {'has_night_session': True, 'margin_rate': {'long': 0.25, 'short': 0.25}, 'multiplier': 60, 'trading_start_time': '21:00'},
  37. # 能源化工
  38. 'SP': {'has_night_session': True, 'margin_rate': {'long': 0.13, 'short': 0.13}, 'multiplier': 10, 'trading_start_time': '21:00'},
  39. 'FU': {'has_night_session': True, 'margin_rate': {'long': 0.16, 'short': 0.16}, 'multiplier': 10, 'trading_start_time': '21:00'},
  40. 'BU': {'has_night_session': True, 'margin_rate': {'long': 0.17, 'short': 0.17}, 'multiplier': 10, 'trading_start_time': '21:00'},
  41. 'RU': {'has_night_session': True, 'margin_rate': {'long': 0.15, 'short': 0.15}, 'multiplier': 10, 'trading_start_time': '21:00'},
  42. 'BR': {'has_night_session': True, 'margin_rate': {'long': 0.15, 'short': 0.15}, 'multiplier': 5, 'trading_start_time': '21:00'},
  43. 'SC': {'has_night_session': True, 'margin_rate': {'long': 0.17, 'short': 0.17}, 'multiplier': 1000, 'trading_start_time': '21:00'},
  44. 'NR': {'has_night_session': True, 'margin_rate': {'long': 0.15, 'short': 0.15}, 'multiplier': 10, 'trading_start_time': '21:00'},
  45. 'LU': {'has_night_session': True, 'margin_rate': {'long': 0.17, 'short': 0.17}, 'multiplier': 10, 'trading_start_time': '21:00'},
  46. 'LC': {'has_night_session': False, 'margin_rate': {'long': 0.16, 'short': 0.16}, 'multiplier': 1, 'trading_start_time': '09:00'},
  47. # 化工
  48. 'FG': {'has_night_session': True, 'margin_rate': {'long': 0.16, 'short': 0.16}, 'multiplier': 20, 'trading_start_time': '21:00'},
  49. 'TA': {'has_night_session': True, 'margin_rate': {'long': 0.13, 'short': 0.13}, 'multiplier': 5, 'trading_start_time': '21:00'},
  50. 'MA': {'has_night_session': True, 'margin_rate': {'long': 0.12, 'short': 0.12}, 'multiplier': 10, 'trading_start_time': '21:00'},
  51. 'SA': {'has_night_session': True, 'margin_rate': {'long': 0.14, 'short': 0.14}, 'multiplier': 20, 'trading_start_time': '21:00'},
  52. 'L': {'has_night_session': True, 'margin_rate': {'long': 0.12, 'short': 0.12}, 'multiplier': 5, 'trading_start_time': '21:00'},
  53. 'V': {'has_night_session': True, 'margin_rate': {'long': 0.12, 'short': 0.12}, 'multiplier': 5, 'trading_start_time': '21:00'},
  54. 'EG': {'has_night_session': True, 'margin_rate': {'long': 0.12, 'short': 0.12}, 'multiplier': 10, 'trading_start_time': '21:00'},
  55. 'PP': {'has_night_session': True, 'margin_rate': {'long': 0.12, 'short': 0.12}, 'multiplier': 5, 'trading_start_time': '21:00'},
  56. 'EB': {'has_night_session': True, 'margin_rate': {'long': 0.12, 'short': 0.12}, 'multiplier': 5, 'trading_start_time': '21:00'},
  57. 'PG': {'has_night_session': True, 'margin_rate': {'long': 0.12, 'short': 0.12}, 'multiplier': 20, 'trading_start_time': '21:00'},
  58. 'PX': {'has_night_session': True, 'margin_rate': {'long': 0.1, 'short': 0.1}, 'multiplier': 5, 'trading_start_time': '21:00'},
  59. # 农产品
  60. 'RM': {'has_night_session': True, 'margin_rate': {'long': 0.13, 'short': 0.13}, 'multiplier': 10, 'trading_start_time': '21:00'},
  61. 'OI': {'has_night_session': True, 'margin_rate': {'long': 0.13, 'short': 0.13}, 'multiplier': 10, 'trading_start_time': '21:00'},
  62. 'CF': {'has_night_session': True, 'margin_rate': {'long': 0.13, 'short': 0.13}, 'multiplier': 5, 'trading_start_time': '21:00'},
  63. 'SR': {'has_night_session': True, 'margin_rate': {'long': 0.12, 'short': 0.12}, 'multiplier': 10, 'trading_start_time': '21:00'},
  64. 'PF': {'has_night_session': True, 'margin_rate': {'long': 0.13, 'short': 0.13}, 'multiplier': 5, 'trading_start_time': '21:00'},
  65. 'C': {'has_night_session': True, 'margin_rate': {'long': 0.12, 'short': 0.12}, 'multiplier': 10, 'trading_start_time': '21:00'},
  66. 'CS': {'has_night_session': True, 'margin_rate': {'long': 0.11, 'short': 0.11}, 'multiplier': 10, 'trading_start_time': '21:00'},
  67. 'CY': {'has_night_session': True, 'margin_rate': {'long': 0.11, 'short': 0.11}, 'multiplier': 5, 'trading_start_time': '21:00'},
  68. 'A': {'has_night_session': True, 'margin_rate': {'long': 0.12, 'short': 0.12}, 'multiplier': 10, 'trading_start_time': '21:00'},
  69. 'B': {'has_night_session': True, 'margin_rate': {'long': 0.12, 'short': 0.12}, 'multiplier': 10, 'trading_start_time': '21:00'},
  70. 'M': {'has_night_session': True, 'margin_rate': {'long': 0.12, 'short': 0.12}, 'multiplier': 10, 'trading_start_time': '21:00'},
  71. 'Y': {'has_night_session': True, 'margin_rate': {'long': 0.12, 'short': 0.12}, 'multiplier': 10, 'trading_start_time': '21:00'},
  72. 'P': {'has_night_session': True, 'margin_rate': {'long': 0.13, 'short': 0.13}, 'multiplier': 10, 'trading_start_time': '21:00'},
  73. # 无夜盘品种
  74. 'IF': {'has_night_session': False, 'margin_rate': {'long': 0.17, 'short': 0.17}, 'multiplier': 300, 'trading_start_time': '09:30'},
  75. 'IH': {'has_night_session': False, 'margin_rate': {'long': 0.15, 'short': 0.15}, 'multiplier': 300, 'trading_start_time': '09:30'},
  76. 'IC': {'has_night_session': False, 'margin_rate': {'long': 0.17, 'short': 0.17}, 'multiplier': 200, 'trading_start_time': '09:30'},
  77. 'IM': {'has_night_session': False, 'margin_rate': {'long': 0.17, 'short': 0.17}, 'multiplier': 200, 'trading_start_time': '09:30'},
  78. 'AP': {'has_night_session': False, 'margin_rate': {'long': 0.16, 'short': 0.16}, 'multiplier': 10, 'trading_start_time': '09:00'},
  79. 'CJ': {'has_night_session': False, 'margin_rate': {'long': 0.14, 'short': 0.14}, 'multiplier': 5, 'trading_start_time': '09:00'},
  80. 'PK': {'has_night_session': False, 'margin_rate': {'long': 0.13, 'short': 0.13}, 'multiplier': 5, 'trading_start_time': '09:00'},
  81. 'JD': {'has_night_session': False, 'margin_rate': {'long': 0.11, 'short': 0.11}, 'multiplier': 10, 'trading_start_time': '09:00'},
  82. 'LH': {'has_night_session': False, 'margin_rate': {'long': 0.13, 'short': 0.13}, 'multiplier': 16, 'trading_start_time': '09:00'},
  83. 'T': {'has_night_session': False, 'margin_rate': {'long': 0.03, 'short': 0.03}, 'multiplier': 1000000, 'trading_start_time': '09:30'},
  84. 'PS': {'has_night_session': False, 'margin_rate': {'long': 0.16, 'short': 0.16}, 'multiplier': 3, 'trading_start_time': '09:00'},
  85. 'UR': {'has_night_session': False, 'margin_rate': {'long': 0.14, 'short': 0.14}, 'multiplier': 20, 'trading_start_time': '09:00'},
  86. 'MO': {'has_night_session': True, 'margin_rate': {'long': 0.17, 'short': 0.17}, 'multiplier': 100, 'trading_start_time': '21:00'},
  87. 'HO': {'has_night_session': False, 'margin_rate': {'long': 0.15, 'short': 0.15}, 'multiplier': 100, 'trading_start_time': '09:30'},
  88. 'LG': {'has_night_session': True, 'margin_rate': {'long': 0.13, 'short': 0.13}, 'multiplier': 90, 'trading_start_time': '21:00'},
  89. 'EC': {'has_night_session': False, 'margin_rate': {'long': 0.23, 'short': 0.23}, 'multiplier': 50, 'trading_start_time': '09:00'},
  90. 'OP': {'has_night_session': False, 'margin_rate': {'long': 0.13, 'short': 0.13}, 'multiplier': 40, 'trading_start_time': '09:00'},
  91. 'BC': {'has_night_session': True, 'margin_rate': {'long': 0.15, 'short': 0.15}, 'multiplier': 5, 'trading_start_time': '21:00'},
  92. 'SH': {'has_night_session': True, 'margin_rate': {'long': 0.13, 'short': 0.13}, 'multiplier': 30, 'trading_start_time': '21:00'},
  93. 'TS': {'has_night_session': False, 'margin_rate': {'long': 0.015, 'short': 0.015}, 'multiplier': 2000000, 'trading_start_time': '09:30'},
  94. 'AD': {'has_night_session': False, 'margin_rate': {'long': 0.13, 'short': 0.13}, 'multiplier': 10, 'trading_start_time': '09:00'},
  95. 'PL': {'has_night_session': False, 'margin_rate': {'long': 0.12, 'short': 0.12}, 'multiplier': 20, 'trading_start_time': '09:00'},
  96. 'SI': {'has_night_session': False, 'margin_rate': {'long': 0.15, 'short': 0.15}, 'multiplier': 5, 'trading_start_time': '09:00'},
  97. 'SM': {'has_night_session': False, 'margin_rate': {'long': 0.15, 'short': 0.15}, 'multiplier': 5, 'trading_start_time': '09:00'},
  98. 'AO': {'has_night_session': True, 'margin_rate': {'long': 0.17, 'short': 0.17}, 'multiplier': 20, 'trading_start_time': '21:00'},
  99. 'TL': {'has_night_session': False, 'margin_rate': {'long': 0.045, 'short': 0.045}, 'multiplier': 1000000, 'trading_start_time': '09:00'},
  100. 'SF': {'has_night_session': False, 'margin_rate': {'long': 0.14, 'short': 0.14}, 'multiplier': 5, 'trading_start_time': '09:00'},
  101. 'PR': {'has_night_session': True, 'margin_rate': {'long': 0.12, 'short': 0.12}, 'multiplier': 15, 'trading_start_time': '21:00'},
  102. 'TF': {'has_night_session': False, 'margin_rate': {'long': 0.022, 'short': 0.022}, 'multiplier': 1000000, 'trading_start_time': '09:00'},
  103. 'BZ': {'has_night_session': False, 'margin_rate': {'long': 0.13, 'short': 0.13}, 'multiplier': 30, 'trading_start_time': '09:00'},
  104. }
  105. def extract_symbol_code(target_str):
  106. """
  107. 从标的字段提取品种代码
  108. 例如: '原油2002(SC2002.XINE)' -> 'SC'
  109. """
  110. match = re.search(r'\(([A-Z]+)\d+\.', target_str)
  111. if match:
  112. return match.group(1)
  113. return None
  114. DETAIL_KEYS = ['prev_close', 'open', 'MA5', 'MA10', 'MA20', 'MA30', 'MA60', 'AVG5']
  115. def parse_details(details_str):
  116. """
  117. 将details列拆解成独立字段
  118. """
  119. if pd.isna(details_str):
  120. return {}
  121. result = {}
  122. for part in str(details_str).split('|'):
  123. if ':' not in part:
  124. continue
  125. key, value = part.split(':', 1)
  126. key = key.strip()
  127. raw_value = value.strip()
  128. is_percent = False
  129. if raw_value.endswith('%'):
  130. is_percent = True
  131. raw_value = raw_value[:-1]
  132. try:
  133. numeric_value = float(raw_value)
  134. if is_percent:
  135. numeric_value /= 100
  136. result[key] = numeric_value
  137. except (ValueError, TypeError):
  138. result[key] = np.nan
  139. return result
  140. def infer_trade_direction(trade_type):
  141. """
  142. 根据交易类型推断方向(多/空)
  143. """
  144. if not isinstance(trade_type, str):
  145. return 'unknown'
  146. lowered = trade_type.lower()
  147. if '多' in trade_type or 'long' in lowered:
  148. return 'long'
  149. if '空' in trade_type or 'short' in lowered:
  150. return 'short'
  151. return 'unknown'
  152. def evaluate_trend_alignment(row):
  153. """
  154. 基于MA60、成交价、open与交易方向判断趋势是否一致
  155. """
  156. direction = infer_trade_direction(row.get('交易类型'))
  157. ma60 = row.get('MA60')
  158. trade_price = row.get('成交价')
  159. open_price = row.get('open')
  160. if direction == 'unknown' or pd.isna(ma60) or pd.isna(trade_price) or pd.isna(open_price):
  161. return '数据不足'
  162. if direction == 'long':
  163. return '一致' if trade_price >= ma60 and open_price >= ma60 else '不一致'
  164. if direction == 'short':
  165. return '一致' if trade_price <= ma60 and open_price <= ma60 else '不一致'
  166. return '数据不足'
  167. def calculate_ma_compaction(row, ma_columns):
  168. """
  169. 计算多条均线的聚合度:标准差 / 均值
  170. """
  171. values = [row.get(col) for col in ma_columns]
  172. if any(pd.isna(v) for v in values):
  173. return np.nan
  174. mean_val = np.mean(values)
  175. if mean_val == 0:
  176. return np.nan
  177. std_val = np.std(values, ddof=0)
  178. return std_val / mean_val
  179. def calculate_gap_ratio(prev_close, open_price):
  180. """
  181. 计算跳空比例
  182. """
  183. if pd.isna(prev_close) or prev_close == 0 or pd.isna(open_price):
  184. return np.nan
  185. return abs(open_price - prev_close) / abs(prev_close)
  186. def calculate_relative_gap(prev_close, open_price, avg_5day_change):
  187. """
  188. 计算相对跳空比例:跳空比率 / avg_5day_change
  189. """
  190. base_gap = calculate_gap_ratio(prev_close, open_price)
  191. if pd.isna(base_gap) or pd.isna(avg_5day_change) or avg_5day_change == 0:
  192. return np.nan
  193. return base_gap / abs(avg_5day_change)
  194. def assign_quantile_labels(df, source_col, target_col, q=4):
  195. """
  196. 根据分位数为连续指标打标签
  197. """
  198. labels = [f'Q{i+1}' for i in range(q)]
  199. valid = df[source_col].dropna()
  200. unique_count = valid.nunique()
  201. if valid.empty or unique_count < 2:
  202. df[target_col] = np.nan
  203. return
  204. bins = min(q, unique_count)
  205. quantiles = pd.qcut(valid, q=bins, labels=labels[:bins], duplicates='drop')
  206. df[target_col] = np.nan
  207. df.loc[quantiles.index, target_col] = quantiles.astype(str)
  208. def calculate_time_segment(order_time_str, trading_start_time_str):
  209. """
  210. 计算开仓时间相对于交易开始时间的时间段
  211. 返回: '<30分钟', '30-60分钟', '>1小时'
  212. """
  213. try:
  214. # 解析时间字符串
  215. order_time = datetime.strptime(order_time_str, '%H:%M:%S')
  216. start_time = datetime.strptime(trading_start_time_str, '%H:%M')
  217. # 如果是夜盘品种(21:00开盘),需要特殊处理跨日情况
  218. if trading_start_time_str == '21:00':
  219. # 如果委托时间在21:00之前,说明是第二天的交易时间
  220. if order_time.hour < 21 and order_time.hour >= 0:
  221. # 加24小时处理跨日
  222. order_time = order_time + timedelta(days=1)
  223. # 计算时间差(分钟)
  224. time_diff = (order_time - start_time).total_seconds() / 60
  225. # 处理负数情况(可能是跨日)
  226. if time_diff < 0:
  227. time_diff += 24 * 60
  228. # 分类
  229. if time_diff < 30:
  230. return '<30分钟'
  231. elif time_diff < 60:
  232. return '30-60分钟'
  233. else:
  234. return '>1小时'
  235. except Exception as e:
  236. print(f"时间计算错误: {order_time_str}, {trading_start_time_str}, {e}")
  237. return '未知'
  238. def calculate_session_type(order_time_str, has_night_session):
  239. """
  240. 计算交易时段类型:夜盘、上午、下午
  241. """
  242. try:
  243. order_time = datetime.strptime(order_time_str, '%H:%M:%S')
  244. hour = order_time.hour
  245. if has_night_session and (hour >= 21 or hour < 3):
  246. return '夜盘'
  247. elif 9 <= hour < 12:
  248. return '上午'
  249. elif 12 <= hour < 16:
  250. return '下午'
  251. else:
  252. return '其他'
  253. except:
  254. return '未知'
  255. def load_and_preprocess_data(csv_path):
  256. """
  257. 加载并预处理数据
  258. """
  259. print("正在加载数据...")
  260. df = pd.read_csv(csv_path)
  261. print(f"原始数据行数: {len(df)}")
  262. print(f"数据列: {df.columns.tolist()}")
  263. # 解析details列,补充所需字段
  264. if 'details' in df.columns:
  265. details_df = df['details'].apply(parse_details).apply(pd.Series)
  266. for key in DETAIL_KEYS:
  267. if key in details_df.columns:
  268. df[key] = details_df[key]
  269. elif key not in df.columns:
  270. df[key] = np.nan
  271. else:
  272. for key in DETAIL_KEYS:
  273. if key not in df.columns:
  274. df[key] = np.nan
  275. if '成交价' not in df.columns:
  276. df['成交价'] = np.nan
  277. avg5_col = 'avg_5day_change'
  278. if avg5_col not in df.columns:
  279. if 'AVG5' in df.columns:
  280. df[avg5_col] = df['AVG5']
  281. else:
  282. df[avg5_col] = np.nan
  283. else:
  284. if 'AVG5' in df.columns:
  285. df[avg5_col] = df[avg5_col].fillna(df['AVG5'])
  286. # 提取品种代码
  287. df['品种代码'] = df['标的'].apply(extract_symbol_code)
  288. # 获取品种配置信息
  289. df['trading_start_time'] = df['品种代码'].apply(
  290. lambda x: FUTURES_CONFIG.get(x, {}).get('trading_start_time', None)
  291. )
  292. df['has_night_session'] = df['品种代码'].apply(
  293. lambda x: FUTURES_CONFIG.get(x, {}).get('has_night_session', False)
  294. )
  295. # 计算开盘后时间段
  296. df['开盘后时间段'] = df.apply(
  297. lambda row: calculate_time_segment(row['委托时间'], row['trading_start_time'])
  298. if pd.notna(row['trading_start_time']) else '未知',
  299. axis=1
  300. )
  301. # 计算交易时段
  302. df['交易时段'] = df.apply(
  303. lambda row: calculate_session_type(row['委托时间'], row['has_night_session']),
  304. axis=1
  305. )
  306. # 计算保证金收益率
  307. df['保证金收益率'] = (df['交易盈亏'] / df['保证金']) * 100
  308. # 计算穿越均线数量
  309. df['穿越均线数量'] = df['crossed_ma_lines'].apply(
  310. lambda x: len(x.split(';')) if pd.notna(x) else 0
  311. )
  312. # 判断是否盈利
  313. df['是否盈利'] = df['交易盈亏'] > 0
  314. # 成交额分组
  315. df['成交额分组'] = pd.cut(df['成交额'],
  316. bins=[0, 100000, 200000, 500000, float('inf')],
  317. labels=['<10万', '10-20万', '20-50万', '>50万'])
  318. # 趋势一致性与衍生指标
  319. df['趋势一致'] = df.apply(evaluate_trend_alignment, axis=1)
  320. df['均线聚合度_5_10_20_30'] = df.apply(
  321. lambda row: calculate_ma_compaction(row, ['MA5', 'MA10', 'MA20', 'MA30']), axis=1
  322. )
  323. df['均线聚合度_5_10_20'] = df.apply(
  324. lambda row: calculate_ma_compaction(row, ['MA5', 'MA10', 'MA20']), axis=1
  325. )
  326. assign_quantile_labels(df, '均线聚合度_5_10_20_30', '均线聚合度_5_10_20_30_分位')
  327. assign_quantile_labels(df, '均线聚合度_5_10_20', '均线聚合度_5_10_20_分位')
  328. df['跳空比率'] = df.apply(
  329. lambda row: calculate_gap_ratio(row.get('prev_close'), row.get('open')), axis=1
  330. )
  331. df['跳空相对波动'] = df.apply(
  332. lambda row: calculate_relative_gap(row.get('prev_close'), row.get('open'), row.get('avg_5day_change')),
  333. axis=1
  334. )
  335. assign_quantile_labels(df, '跳空相对波动', '跳空相对波动分位')
  336. print(f"预处理后数据行数: {len(df)}")
  337. print(f"品种代码提取成功率: {df['品种代码'].notna().sum() / len(df) * 100:.2f}%")
  338. return df
  339. def calculate_statistics(group_df):
  340. """
  341. 计算统计指标
  342. """
  343. total_count = len(group_df)
  344. win_count = (group_df['交易盈亏'] > 0).sum()
  345. win_rate = win_count / total_count if total_count > 0 else 0
  346. avg_profit_loss = group_df['交易盈亏'].mean()
  347. # 计算盈亏比
  348. profit_trades = group_df[group_df['交易盈亏'] > 0]['交易盈亏']
  349. loss_trades = group_df[group_df['交易盈亏'] <= 0]['交易盈亏']
  350. avg_profit = profit_trades.mean() if len(profit_trades) > 0 else 0
  351. avg_loss = abs(loss_trades.mean()) if len(loss_trades) > 0 else 0
  352. profit_loss_ratio = avg_profit / avg_loss if avg_loss > 0 else np.inf
  353. avg_margin_return = group_df['保证金收益率'].mean()
  354. return pd.Series({
  355. '出现次数': total_count,
  356. '胜率': win_rate,
  357. '平均盈亏': avg_profit_loss,
  358. '盈亏比': profit_loss_ratio,
  359. '平均保证金收益率': avg_margin_return
  360. })
  361. def analyze_ma_lines(df):
  362. """
  363. 分析crossed_ma_lines维度
  364. """
  365. print("\n" + "="*80)
  366. print("均线组合分析")
  367. print("="*80)
  368. ma_stats = df.groupby('crossed_ma_lines').apply(calculate_statistics).round(4)
  369. ma_stats = ma_stats.sort_values('出现次数', ascending=False)
  370. print(ma_stats.to_string())
  371. return ma_stats
  372. def analyze_time_segment(df):
  373. """
  374. 分析开盘后时间段维度
  375. """
  376. print("\n" + "="*80)
  377. print("开盘后时间段分析")
  378. print("="*80)
  379. time_stats = df.groupby('开盘后时间段').apply(calculate_statistics).round(4)
  380. # 按指定顺序排列
  381. order = ['<30分钟', '30-60分钟', '>1小时', '未知']
  382. time_stats = time_stats.reindex([o for o in order if o in time_stats.index])
  383. print(time_stats.to_string())
  384. return time_stats
  385. def analyze_cross_dimension(df):
  386. """
  387. 交叉分析:均线组合 × 开盘后时间段
  388. """
  389. print("\n" + "="*80)
  390. print("交叉分析:均线组合 × 开盘后时间段")
  391. print("="*80)
  392. # 样本量分布
  393. cross_count = pd.crosstab(df['crossed_ma_lines'], df['开盘后时间段'])
  394. print("\n样本量分布:")
  395. print(cross_count.to_string())
  396. # 胜率对比
  397. cross_winrate = pd.crosstab(
  398. df['crossed_ma_lines'],
  399. df['开盘后时间段'],
  400. values=df['是否盈利'],
  401. aggfunc='mean'
  402. ).round(4)
  403. print("\n胜率对比:")
  404. print(cross_winrate.to_string())
  405. # 平均盈亏
  406. cross_profit = pd.crosstab(
  407. df['crossed_ma_lines'],
  408. df['开盘后时间段'],
  409. values=df['交易盈亏'],
  410. aggfunc='mean'
  411. ).round(2)
  412. print("\n平均盈亏:")
  413. print(cross_profit.to_string())
  414. # 平均保证金收益率
  415. cross_return = pd.crosstab(
  416. df['crossed_ma_lines'],
  417. df['开盘后时间段'],
  418. values=df['保证金收益率'],
  419. aggfunc='mean'
  420. ).round(4)
  421. print("\n平均保证金收益率(%):")
  422. print(cross_return.to_string())
  423. return cross_count, cross_winrate, cross_profit, cross_return
  424. def analyze_trade_type_and_variety(df):
  425. """
  426. 分析交易类型和品种维度
  427. """
  428. print("\n" + "="*80)
  429. print("交易类型分析")
  430. print("="*80)
  431. trade_type_stats = df.groupby('交易类型').apply(calculate_statistics).round(4)
  432. print(trade_type_stats.to_string())
  433. print("\n" + "="*80)
  434. print("品种类型分析")
  435. print("="*80)
  436. variety_stats = df.groupby('品种').apply(calculate_statistics).round(4)
  437. print(variety_stats.to_string())
  438. print("\n" + "="*80)
  439. print("具体品种代码分析(前20名)")
  440. print("="*80)
  441. symbol_stats = df.groupby('品种代码').apply(calculate_statistics).round(4)
  442. symbol_stats = symbol_stats.sort_values('出现次数', ascending=False).head(20)
  443. print(symbol_stats.to_string())
  444. return trade_type_stats, variety_stats, symbol_stats
  445. def analyze_additional_dimensions(df):
  446. """
  447. 其他维度分析
  448. """
  449. print("\n" + "="*80)
  450. print("成交额分组分析")
  451. print("="*80)
  452. amount_stats = df.groupby('成交额分组').apply(calculate_statistics).round(4)
  453. print(amount_stats.to_string())
  454. print("\n" + "="*80)
  455. print("交易时段分析")
  456. print("="*80)
  457. session_stats = df.groupby('交易时段').apply(calculate_statistics).round(4)
  458. print(session_stats.to_string())
  459. print("\n" + "="*80)
  460. print("穿越均线数量分析")
  461. print("="*80)
  462. ma_count_stats = df.groupby('穿越均线数量').apply(calculate_statistics).round(4)
  463. print(ma_count_stats.to_string())
  464. print("\n" + "="*80)
  465. print("多空对比(按均线组合)- 前10个组合")
  466. print("="*80)
  467. # 获取出现次数最多的前10个均线组合
  468. top_ma_lines = df['crossed_ma_lines'].value_counts().head(10).index
  469. df_top = df[df['crossed_ma_lines'].isin(top_ma_lines)]
  470. long_short_stats = df_top.groupby(['crossed_ma_lines', '交易类型']).apply(
  471. calculate_statistics
  472. ).round(4)
  473. print(long_short_stats.to_string())
  474. print("\n" + "="*80)
  475. print("品种特性分析(有夜盘 vs 无夜盘)")
  476. print("="*80)
  477. night_session_stats = df.groupby('has_night_session').apply(calculate_statistics).round(4)
  478. night_session_stats.index = ['无夜盘', '有夜盘']
  479. print(night_session_stats.to_string())
  480. print("\n" + "="*80)
  481. print("组合策略分析:最佳组合(样本量>=10)")
  482. print("="*80)
  483. # 三维组合分析
  484. combo_stats = df.groupby(['crossed_ma_lines', '开盘后时间段', '交易类型']).apply(
  485. calculate_statistics
  486. ).round(4)
  487. # 筛选样本量>=10的组合
  488. combo_stats = combo_stats[combo_stats['出现次数'] >= 10]
  489. # 按保证金收益率排序,显示前10
  490. combo_stats_sorted = combo_stats.sort_values('平均保证金收益率', ascending=False).head(10)
  491. print("\n保证金收益率最高的10个组合:")
  492. print(combo_stats_sorted.to_string())
  493. # 按胜率排序,显示前10
  494. combo_stats_sorted_winrate = combo_stats.sort_values('胜率', ascending=False).head(10)
  495. print("\n胜率最高的10个组合:")
  496. print(combo_stats_sorted_winrate.to_string())
  497. return {
  498. 'amount_stats': amount_stats,
  499. 'session_stats': session_stats,
  500. 'ma_count_stats': ma_count_stats,
  501. 'long_short_stats': long_short_stats,
  502. 'night_session_stats': night_session_stats,
  503. 'combo_stats': combo_stats
  504. }
  505. def analyze_trend_alignment(df):
  506. """
  507. 趋势一致性分析
  508. """
  509. print("\n" + "="*80)
  510. print("趋势一致性分析(基于MA60 vs 成交价/open)")
  511. print("="*80)
  512. if df['趋势一致'].dropna().empty:
  513. print("暂无可用数据")
  514. return None
  515. trend_stats = df.groupby('趋势一致').apply(calculate_statistics).round(4)
  516. print(trend_stats.to_string())
  517. return trend_stats
  518. def analyze_ma_compaction(df):
  519. """
  520. 均线聚合度分析
  521. """
  522. print("\n" + "="*80)
  523. print("均线聚合度分析(标准差/均值)")
  524. print("="*80)
  525. compaction_results = {}
  526. config = [
  527. ('均线聚合度_5_10_20_30', 'MA5/MA10/MA20/MA30', '均线聚合度_5_10_20_30_分位', 'ma_compaction_ma5_ma30'),
  528. ('均线聚合度_5_10_20', 'MA5/MA10/MA20', '均线聚合度_5_10_20_分位', 'ma_compaction_ma5_ma20')
  529. ]
  530. for col, label, quantile_col, result_key in config:
  531. print(f"\n--- {label} ---")
  532. if col not in df.columns or df[col].dropna().empty:
  533. print("数据不足,无法分析。")
  534. compaction_results[result_key] = None
  535. continue
  536. print(f"{label} 描述统计:均值={df[col].mean():.4f}, 中位数={df[col].median():.4f}, 最大值={df[col].max():.4f}")
  537. if quantile_col not in df.columns or df[quantile_col].dropna().empty:
  538. print("分位标签缺失,跳过统计。")
  539. compaction_results[result_key] = None
  540. continue
  541. stats = df.groupby(quantile_col).apply(calculate_statistics).round(4)
  542. print(stats.to_string())
  543. compaction_results[result_key] = stats
  544. return compaction_results
  545. def analyze_gap_behavior(df):
  546. """
  547. 跳空行为分析
  548. """
  549. print("\n" + "="*80)
  550. print("跳空行为分析")
  551. print("="*80)
  552. if '跳空比率' not in df.columns or df['跳空比率'].dropna().empty:
  553. print("缺少跳空数据,无法分析。")
  554. return None
  555. print(f"跳空比率描述:均值={df['跳空比率'].mean():.4f}, 最大值={df['跳空比率'].max():.4f}")
  556. if '跳空相对波动' in df.columns and not df['跳空相对波动'].dropna().empty:
  557. print(f"跳空相对波动描述:均值={df['跳空相对波动'].mean():.4f}, 最大值={df['跳空相对波动'].max():.4f}")
  558. if '跳空相对波动分位' not in df.columns or df['跳空相对波动分位'].dropna().empty:
  559. print("跳空相对波动分位标签缺失,跳过分组统计。")
  560. return None
  561. gap_stats = df.groupby('跳空相对波动分位').apply(calculate_statistics).round(4)
  562. print("\n按跳空相对波动分位的表现:")
  563. print(gap_stats.to_string())
  564. return gap_stats
  565. def analyze_enhanced_cross_metrics(df):
  566. """
  567. 将新增指标与核心维度(均线组合、开盘后时间段)交叉对比
  568. """
  569. print("\n" + "="*80)
  570. print("扩展指标交叉分析(趋势一致/均线聚合度/跳空 vs 核心维度)")
  571. print("="*80)
  572. config = [
  573. (['crossed_ma_lines', '趋势一致'], '趋势一致 × 均线组合', 'trend_vs_ma'),
  574. (['开盘后时间段', '趋势一致'], '趋势一致 × 开盘后时间段', 'trend_vs_time'),
  575. (['crossed_ma_lines', '均线聚合度_5_10_20_30_分位'], '均线聚合度(4) × 均线组合', 'ma_compact4_vs_ma'),
  576. (['开盘后时间段', '均线聚合度_5_10_20_30_分位'], '均线聚合度(4) × 开盘后时间段', 'ma_compact4_vs_time'),
  577. (['crossed_ma_lines', '均线聚合度_5_10_20_分位'], '均线聚合度(3) × 均线组合', 'ma_compact3_vs_ma'),
  578. (['开盘后时间段', '均线聚合度_5_10_20_分位'], '均线聚合度(3) × 开盘后时间段', 'ma_compact3_vs_time'),
  579. (['crossed_ma_lines', '跳空相对波动分位'], '跳空相对波动 × 均线组合', 'gap_vs_ma'),
  580. (['开盘后时间段', '跳空相对波动分位'], '跳空相对波动 × 开盘后时间段', 'gap_vs_time'),
  581. ]
  582. results = {}
  583. for group_cols, title, key in config:
  584. missing_cols = [col for col in group_cols if col not in df.columns]
  585. if missing_cols:
  586. print(f"\n{title}: 缺少列 {missing_cols},跳过。")
  587. results[key] = None
  588. continue
  589. if df[group_cols[1]].dropna().empty:
  590. print(f"\n{title}: 数据不足,跳过。")
  591. results[key] = None
  592. continue
  593. stats = df.groupby(group_cols).apply(calculate_statistics).round(4)
  594. print(f"\n{title}")
  595. print(stats.to_string())
  596. results[key] = stats
  597. return results
  598. def create_visualizations(df, ma_stats, time_stats, cross_winrate, cross_profit, cross_return, output_dir):
  599. """
  600. 创建数据可视化图表
  601. """
  602. print("\n" + "="*80)
  603. print("生成可视化图表...")
  604. print("="*80)
  605. # 创建输出目录
  606. os.makedirs(output_dir, exist_ok=True)
  607. def annotate_barh(ax, bars, formatter=lambda v: f"{v:.0f}", offset_ratio=0.01):
  608. """
  609. 为水平柱状图添加数值标注
  610. """
  611. if bars is None or len(bars) == 0:
  612. return
  613. max_width = max((bar.get_width() for bar in bars), default=0)
  614. offset = max(max_width * offset_ratio, 0.5)
  615. for bar in bars:
  616. width = bar.get_width()
  617. if np.isnan(width):
  618. continue
  619. ha = 'left'
  620. x = width + offset
  621. if width < 0:
  622. ha = 'right'
  623. x = width - offset
  624. y = bar.get_y() + bar.get_height() / 2
  625. ax.text(x, y, formatter(width), va='center', ha=ha, fontsize=9)
  626. def annotate_bar(ax, bars, formatter=lambda v: f"{v:.0f}", offset_ratio=0.01):
  627. """
  628. 为垂直柱状图添加数值标注
  629. """
  630. if bars is None or len(bars) == 0:
  631. return
  632. max_height = max((bar.get_height() for bar in bars), default=0)
  633. offset = max(max_height * offset_ratio, 0.5)
  634. for bar in bars:
  635. height = bar.get_height()
  636. if np.isnan(height):
  637. continue
  638. va = 'bottom'
  639. y = height + offset
  640. if height < 0:
  641. va = 'top'
  642. y = height - offset
  643. x = bar.get_x() + bar.get_width() / 2
  644. ax.text(x, y, formatter(height), va=va, ha='center', fontsize=9)
  645. # 1. 均线组合表现对比(前15个)
  646. fig, axes = plt.subplots(2, 2, figsize=(16, 12))
  647. top_ma = ma_stats.head(15)
  648. # 出现次数
  649. bars = axes[0, 0].barh(range(len(top_ma)), top_ma['出现次数'])
  650. axes[0, 0].set_yticks(range(len(top_ma)))
  651. axes[0, 0].set_yticklabels(top_ma.index)
  652. axes[0, 0].set_xlabel('出现次数')
  653. axes[0, 0].set_title('均线组合出现次数(Top 15)')
  654. axes[0, 0].invert_yaxis()
  655. annotate_barh(axes[0, 0], bars)
  656. # 胜率
  657. colors = ['green' if x > 0.5 else 'red' for x in top_ma['胜率']]
  658. bars = axes[0, 1].barh(range(len(top_ma)), top_ma['胜率'], color=colors)
  659. axes[0, 1].set_yticks(range(len(top_ma)))
  660. axes[0, 1].set_yticklabels(top_ma.index)
  661. axes[0, 1].set_xlabel('胜率')
  662. axes[0, 1].set_title('均线组合胜率(Top 15)')
  663. axes[0, 1].axvline(x=0.5, color='black', linestyle='--', alpha=0.5)
  664. axes[0, 1].invert_yaxis()
  665. annotate_barh(axes[0, 1], bars, formatter=lambda v: f"{v:.1%}", offset_ratio=0.02)
  666. # 平均盈亏
  667. colors = ['green' if x > 0 else 'red' for x in top_ma['平均盈亏']]
  668. bars = axes[1, 0].barh(range(len(top_ma)), top_ma['平均盈亏'], color=colors)
  669. axes[1, 0].set_yticks(range(len(top_ma)))
  670. axes[1, 0].set_yticklabels(top_ma.index)
  671. axes[1, 0].set_xlabel('平均盈亏(元)')
  672. axes[1, 0].set_title('均线组合平均盈亏(Top 15)')
  673. axes[1, 0].axvline(x=0, color='black', linestyle='--', alpha=0.5)
  674. axes[1, 0].invert_yaxis()
  675. annotate_barh(axes[1, 0], bars, formatter=lambda v: f"{v:,.0f}", offset_ratio=0.015)
  676. # 保证金收益率
  677. colors = ['green' if x > 0 else 'red' for x in top_ma['平均保证金收益率']]
  678. bars = axes[1, 1].barh(range(len(top_ma)), top_ma['平均保证金收益率'], color=colors)
  679. axes[1, 1].set_yticks(range(len(top_ma)))
  680. axes[1, 1].set_yticklabels(top_ma.index)
  681. axes[1, 1].set_xlabel('平均保证金收益率(%)')
  682. axes[1, 1].set_title('均线组合平均保证金收益率(Top 15)')
  683. axes[1, 1].axvline(x=0, color='black', linestyle='--', alpha=0.5)
  684. axes[1, 1].invert_yaxis()
  685. annotate_barh(axes[1, 1], bars, formatter=lambda v: f"{v:.2f}%", offset_ratio=0.02)
  686. plt.tight_layout()
  687. plt.savefig(os.path.join(output_dir, 'ma_lines_analysis.png'), dpi=150, bbox_inches='tight')
  688. print(f"已保存: {os.path.join(output_dir, 'ma_lines_analysis.png')}")
  689. plt.close()
  690. # 2. 开盘后时间段表现
  691. fig, axes = plt.subplots(1, 2, figsize=(14, 6))
  692. order = ['<30分钟', '30-60分钟', '>1小时']
  693. time_stats_filtered = time_stats[time_stats.index.isin(order)]
  694. time_stats_filtered = time_stats_filtered.loc[[idx for idx in order if idx in time_stats_filtered.index]]
  695. profit_colors = ['green' if val >= 0 else 'red' for val in time_stats_filtered['平均盈亏']]
  696. bars = axes[0].bar(range(len(time_stats_filtered)), time_stats_filtered['平均盈亏'], color=profit_colors)
  697. axes[0].set_xticks(range(len(time_stats_filtered)))
  698. axes[0].set_xticklabels(time_stats_filtered.index)
  699. axes[0].set_ylabel('平均盈亏(元)')
  700. axes[0].set_title('不同时间段平均盈亏')
  701. axes[0].axhline(y=0, color='black', linestyle='--', alpha=0.5)
  702. annotate_bar(axes[0], bars, formatter=lambda v: f"{v:,.0f}")
  703. margin_colors = ['green' if val >= 0 else 'red' for val in time_stats_filtered['平均保证金收益率']]
  704. bars = axes[1].bar(range(len(time_stats_filtered)), time_stats_filtered['平均保证金收益率'], color=margin_colors)
  705. axes[1].set_xticks(range(len(time_stats_filtered)))
  706. axes[1].set_xticklabels(time_stats_filtered.index)
  707. axes[1].set_ylabel('平均保证金收益率(%)')
  708. axes[1].set_title('不同时间段平均保证金收益率')
  709. axes[1].axhline(y=0, color='black', linestyle='--', alpha=0.5)
  710. annotate_bar(axes[1], bars, formatter=lambda v: f"{v:.2f}%")
  711. plt.tight_layout()
  712. plt.savefig(os.path.join(output_dir, 'time_segment_analysis.png'), dpi=150, bbox_inches='tight')
  713. print(f"已保存: {os.path.join(output_dir, 'time_segment_analysis.png')}")
  714. plt.close()
  715. # 3. 交叉分析热力图
  716. fig, axes = plt.subplots(1, 3, figsize=(22, 10))
  717. # 选择前15个均线组合
  718. top_ma_lines = ma_stats.head(15).index
  719. heatmap_cols = ['<30分钟', '30-60分钟', '>1小时']
  720. def prepare_heatmap(table):
  721. filtered = table.reindex(index=[idx for idx in top_ma_lines if idx in table.index])
  722. if filtered.empty:
  723. return filtered
  724. cols = [col for col in heatmap_cols if col in filtered.columns]
  725. if cols:
  726. filtered = filtered[cols]
  727. return filtered
  728. cross_winrate_filtered = prepare_heatmap(cross_winrate)
  729. cross_profit_filtered = prepare_heatmap(cross_profit)
  730. cross_return_filtered = prepare_heatmap(cross_return)
  731. # 胜率热力图
  732. sns.heatmap(cross_winrate_filtered, annot=True, fmt='.2f', cmap='RdYlGn',
  733. center=0.5, vmin=0, vmax=1, ax=axes[0], cbar_kws={'label': '胜率'})
  734. axes[0].set_title('均线组合 × 时间段 胜率热力图(Top 15)')
  735. axes[0].set_xlabel('开盘后时间段')
  736. axes[0].set_ylabel('均线组合')
  737. # 平均盈亏热力图
  738. sns.heatmap(cross_profit_filtered, annot=True, fmt='.0f', cmap='RdYlGn', center=0,
  739. ax=axes[1], cbar_kws={'label': '平均盈亏(元)'})
  740. axes[1].set_title('均线组合 × 时间段 平均盈亏热力图(Top 15)')
  741. axes[1].set_xlabel('开盘后时间段')
  742. axes[1].set_ylabel('均线组合')
  743. # 平均保证金收益率热力图
  744. sns.heatmap(cross_return_filtered, annot=True, fmt='.2f', cmap='RdYlGn', center=0,
  745. ax=axes[2], cbar_kws={'label': '平均保证金收益率(%)'})
  746. axes[2].set_title('均线组合 × 时间段 平均保证金收益率热力图(Top 15)')
  747. axes[2].set_xlabel('开盘后时间段')
  748. axes[2].set_ylabel('均线组合')
  749. plt.tight_layout()
  750. plt.savefig(os.path.join(output_dir, 'cross_analysis_heatmap.png'), dpi=150, bbox_inches='tight')
  751. print(f"已保存: {os.path.join(output_dir, 'cross_analysis_heatmap.png')}")
  752. plt.close()
  753. # 4. 品种表现分析
  754. fig, axes = plt.subplots(2, 2, figsize=(16, 12))
  755. # 交易类型对比
  756. trade_type_stats = df.groupby('交易类型').apply(calculate_statistics)
  757. axes[0, 0].bar(trade_type_stats.index, trade_type_stats['胜率'],
  758. color=['green', 'red'])
  759. axes[0, 0].set_ylabel('胜率')
  760. axes[0, 0].set_title('交易类型胜率对比')
  761. axes[0, 0].axhline(y=0.5, color='black', linestyle='--', alpha=0.5)
  762. axes[0, 1].bar(trade_type_stats.index, trade_type_stats['平均保证金收益率'],
  763. color=['green', 'red'])
  764. axes[0, 1].set_ylabel('平均保证金收益率(%)')
  765. axes[0, 1].set_title('交易类型保证金收益率对比')
  766. axes[0, 1].axhline(y=0, color='black', linestyle='--', alpha=0.5)
  767. # 品种类型对比
  768. variety_stats = df.groupby('品种').apply(calculate_statistics)
  769. axes[1, 0].bar(variety_stats.index, variety_stats['胜率'])
  770. axes[1, 0].set_ylabel('胜率')
  771. axes[1, 0].set_title('品种类型胜率对比')
  772. axes[1, 0].axhline(y=0.5, color='black', linestyle='--', alpha=0.5)
  773. axes[1, 1].bar(variety_stats.index, variety_stats['平均保证金收益率'])
  774. axes[1, 1].set_ylabel('平均保证金收益率(%)')
  775. axes[1, 1].set_title('品种类型保证金收益率对比')
  776. axes[1, 1].axhline(y=0, color='black', linestyle='--', alpha=0.5)
  777. plt.tight_layout()
  778. plt.savefig(os.path.join(output_dir, 'variety_analysis.png'), dpi=150, bbox_inches='tight')
  779. print(f"已保存: {os.path.join(output_dir, 'variety_analysis.png')}")
  780. plt.close()
  781. # 5. 扩展指标热力图
  782. def build_metric_pivot(source_df, row_field, col_field, value_field, row_order=None, col_order=None):
  783. filtered = source_df.dropna(subset=[col_field])
  784. if row_order is not None:
  785. filtered = filtered[filtered[row_field].isin(row_order)]
  786. pivot = pd.pivot_table(
  787. filtered,
  788. index=row_field,
  789. columns=col_field,
  790. values=value_field,
  791. aggfunc='mean'
  792. )
  793. if row_order is not None:
  794. pivot = pivot.reindex([idx for idx in row_order if idx in pivot.index])
  795. if col_order is not None:
  796. pivot = pivot[[col for col in col_order if col in pivot.columns]]
  797. return pivot
  798. def plot_heatmap(ax, data, title, fmt='.2f', center=None, vmin=None, vmax=None, cbar_label=''):
  799. if data.empty:
  800. ax.axis('off')
  801. ax.set_title(f"{title}(数据不足)")
  802. return
  803. sns.heatmap(
  804. data,
  805. annot=True,
  806. fmt=fmt,
  807. cmap='RdYlGn',
  808. center=center,
  809. vmin=vmin,
  810. vmax=vmax,
  811. ax=ax,
  812. cbar_kws={'label': cbar_label}
  813. )
  814. ax.set_title(title)
  815. ax.set_xlabel(data.columns.name or '')
  816. ax.set_ylabel(data.index.name or '')
  817. enhanced_metric_configs = [
  818. ('趋势一致', 'trend_alignment_cross.png', '趋势一致', ['一致', '不一致', '数据不足']),
  819. ('均线聚合度_5_10_20_30_分位', 'ma_compaction_4lines_cross.png', '均线聚合度(MA5/MA10/MA20/MA30)', None),
  820. ('均线聚合度_5_10_20_分位', 'ma_compaction_3lines_cross.png', '均线聚合度(MA5/MA10/MA20)', None),
  821. ('跳空相对波动分位', 'gap_behavior_cross.png', '跳空相对波动', None),
  822. ]
  823. row_configs = [
  824. ('crossed_ma_lines', 'Top 15 均线组合', list(ma_stats.head(15).index)),
  825. ('开盘后时间段', '开盘后时间段', ['<30分钟', '30-60分钟', '>1小时', '未知']),
  826. ]
  827. value_configs = [
  828. ('是否盈利', '胜率', '.2f', 0.5, 0, 1, '胜率'),
  829. ('交易盈亏', '平均盈亏', '.0f', 0, None, None, '平均盈亏(元)'),
  830. ('保证金收益率', '平均保证金收益率', '.2f', 0, None, None, '平均保证金收益率(%)'),
  831. ]
  832. for metric_field, filename, metric_title, col_order in enhanced_metric_configs:
  833. if metric_field not in df.columns or df[metric_field].dropna().empty:
  834. continue
  835. fig, axes = plt.subplots(len(row_configs), len(value_configs), figsize=(22, 12))
  836. for row_idx, (row_field, row_label, row_order) in enumerate(row_configs):
  837. for col_idx, (value_field, value_label, fmt, center, vmin, vmax, cbar_label) in enumerate(value_configs):
  838. ax = axes[row_idx, col_idx]
  839. pivot = build_metric_pivot(df, row_field, metric_field, value_field, row_order=row_order, col_order=col_order)
  840. ax.set_title(f"{row_label} - {value_label}")
  841. plot_heatmap(
  842. ax,
  843. pivot,
  844. f"{row_label} - {value_label}",
  845. fmt=fmt,
  846. center=center,
  847. vmin=vmin,
  848. vmax=vmax,
  849. cbar_label=cbar_label
  850. )
  851. ax.set_xlabel(metric_title)
  852. ax.set_ylabel(row_label)
  853. plt.suptitle(f"{metric_title} × 核心维度表现", fontsize=16)
  854. plt.tight_layout(rect=[0, 0, 1, 0.97])
  855. output_path = os.path.join(output_dir, filename)
  856. plt.savefig(output_path, dpi=150, bbox_inches='tight')
  857. print(f"已保存: {output_path}")
  858. plt.close()
  859. print("\n所有图表已生成!")
  860. def save_results_to_csv(df, ma_stats, time_stats, output_dir,
  861. trend_alignment_stats=None, ma_compaction_stats=None,
  862. gap_stats=None, enhanced_cross_stats=None):
  863. """
  864. 保存分析结果到CSV
  865. """
  866. print("\n" + "="*80)
  867. print("保存分析结果到CSV...")
  868. print("="*80)
  869. # 保存增强后的原始数据
  870. output_file = os.path.join(output_dir, 'records_enhanced.csv')
  871. df.to_csv(output_file, index=False, encoding='utf-8-sig')
  872. print(f"已保存增强数据: {output_file}")
  873. # 保存均线组合统计
  874. output_file = os.path.join(output_dir, 'ma_lines_stats.csv')
  875. ma_stats.to_csv(output_file, encoding='utf-8-sig')
  876. print(f"已保存均线组合统计: {output_file}")
  877. # 保存时间段统计
  878. output_file = os.path.join(output_dir, 'time_segment_stats.csv')
  879. time_stats.to_csv(output_file, encoding='utf-8-sig')
  880. print(f"已保存时间段统计: {output_file}")
  881. # 保存品种统计
  882. symbol_stats = df.groupby('品种代码').apply(calculate_statistics)
  883. output_file = os.path.join(output_dir, 'symbol_stats.csv')
  884. symbol_stats.to_csv(output_file, encoding='utf-8-sig')
  885. print(f"已保存品种统计: {output_file}")
  886. # 保存组合策略统计
  887. combo_stats = df.groupby(['crossed_ma_lines', '开盘后时间段', '交易类型']).apply(
  888. calculate_statistics
  889. )
  890. combo_stats = combo_stats[combo_stats['出现次数'] >= 5]
  891. combo_stats = combo_stats.sort_values('平均保证金收益率', ascending=False)
  892. output_file = os.path.join(output_dir, 'combo_strategy_stats.csv')
  893. combo_stats.to_csv(output_file, encoding='utf-8-sig')
  894. print(f"已保存组合策略统计: {output_file}")
  895. if trend_alignment_stats is not None:
  896. output_file = os.path.join(output_dir, 'trend_alignment_stats.csv')
  897. trend_alignment_stats.to_csv(output_file, encoding='utf-8-sig')
  898. print(f"已保存趋势一致性统计: {output_file}")
  899. if ma_compaction_stats:
  900. for key, stats in ma_compaction_stats.items():
  901. if stats is None:
  902. continue
  903. output_file = os.path.join(output_dir, f'{key}_stats.csv')
  904. stats.to_csv(output_file, encoding='utf-8-sig')
  905. print(f"已保存均线聚合度统计: {output_file}")
  906. if gap_stats is not None:
  907. output_file = os.path.join(output_dir, 'gap_behavior_stats.csv')
  908. gap_stats.to_csv(output_file, encoding='utf-8-sig')
  909. print(f"已保存跳空行为统计: {output_file}")
  910. if enhanced_cross_stats:
  911. for key, stats in enhanced_cross_stats.items():
  912. if stats is None:
  913. continue
  914. output_file = os.path.join(output_dir, f'{key}_stats.csv')
  915. stats.to_csv(output_file, encoding='utf-8-sig')
  916. print(f"已保存 {key} 交叉统计: {output_file}")
  917. def main():
  918. """
  919. 主函数
  920. """
  921. # 设置路径
  922. script_dir = os.path.dirname(os.path.abspath(__file__))
  923. csv_path = os.path.join(script_dir, 'records.csv')
  924. output_dir = os.path.join('data', 'future', 'analysis_results')
  925. # 检查文件是否存在
  926. if not os.path.exists(csv_path):
  927. print(f"错误: 找不到文件 {csv_path}")
  928. return
  929. print("="*80)
  930. print("期货开仓记录分析工具")
  931. print("="*80)
  932. # 加载和预处理数据
  933. df = load_and_preprocess_data(csv_path)
  934. # 进行各维度分析
  935. ma_stats = analyze_ma_lines(df)
  936. time_stats = analyze_time_segment(df)
  937. cross_count, cross_winrate, cross_profit, cross_return = analyze_cross_dimension(df)
  938. trade_type_stats, variety_stats, symbol_stats = analyze_trade_type_and_variety(df)
  939. additional_stats = analyze_additional_dimensions(df)
  940. trend_alignment_stats = analyze_trend_alignment(df)
  941. ma_compaction_stats = analyze_ma_compaction(df)
  942. gap_stats = analyze_gap_behavior(df)
  943. enhanced_cross_stats = analyze_enhanced_cross_metrics(df)
  944. # 生成可视化图表
  945. create_visualizations(df, ma_stats, time_stats, cross_winrate, cross_profit, cross_return, output_dir)
  946. # 保存结果到CSV
  947. save_results_to_csv(
  948. df,
  949. ma_stats,
  950. time_stats,
  951. output_dir,
  952. trend_alignment_stats=trend_alignment_stats,
  953. ma_compaction_stats=ma_compaction_stats,
  954. gap_stats=gap_stats,
  955. enhanced_cross_stats=enhanced_cross_stats
  956. )
  957. print("\n" + "="*80)
  958. print("分析完成!")
  959. print(f"结果保存在: {output_dir}")
  960. print("="*80)
  961. if __name__ == '__main__':
  962. main()