transaction_pair_analysis.py 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868
  1. # 交易配对分析工具
  2. # 用于从交易记录CSV文件中为开仓/平仓交易进行配对,为每对关联交易分配相同ID
  3. import pandas as pd
  4. import numpy as np
  5. from datetime import datetime
  6. import re
  7. import os
  8. import warnings
  9. warnings.filterwarnings('ignore')
  10. def _get_current_directory():
  11. """
  12. 获取当前文件所在目录
  13. 返回:
  14. str: 当前目录路径
  15. """
  16. try:
  17. current_dir = os.path.dirname(os.path.abspath(__file__))
  18. except NameError:
  19. current_dir = os.getcwd()
  20. return current_dir
  21. def read_transaction_csv(csv_path):
  22. """
  23. 读取交易记录CSV文件,支持多种编码格式
  24. 参数:
  25. csv_path (str): CSV文件路径
  26. 返回:
  27. pandas.DataFrame: 包含交易记录的DataFrame
  28. """
  29. encodings = ['gbk', 'utf-8-sig', 'utf-8', 'gb2312', 'gb18030', 'latin1']
  30. for encoding in encodings:
  31. try:
  32. df = pd.read_csv(csv_path, encoding=encoding)
  33. print(f"成功使用 {encoding} 编码读取CSV文件")
  34. print(f"从CSV文件中读取到 {len(df)} 条记录")
  35. return df
  36. except UnicodeDecodeError:
  37. continue
  38. except Exception as e:
  39. if encoding == encodings[-1]:
  40. print(f"读取CSV文件时出错: {str(e)}")
  41. raise
  42. continue
  43. print(f"无法使用任何编码格式读取CSV文件: {csv_path}")
  44. return pd.DataFrame()
  45. def parse_transaction_data(df):
  46. """
  47. 解析交易数据,提取关键信息
  48. 参数:
  49. df (pandas.DataFrame): 原始交易数据
  50. 返回:
  51. pandas.DataFrame: 添加了解析字段的DataFrame
  52. """
  53. df = df.copy()
  54. # 提取标的(完整信息用于匹配)
  55. df['标的_完整'] = df['标的'].astype(str).str.strip()
  56. # 提取交易类型
  57. df['交易类型_标准'] = df['交易类型'].astype(str).str.strip()
  58. # 判断是开仓还是平仓
  59. df['仓位操作'] = df['交易类型_标准'].apply(lambda x: '开仓' if '开' in x else ('平仓' if '平' in x else '未知'))
  60. # 判断方向(多/空)
  61. df['方向'] = df['交易类型_标准'].apply(lambda x: '多' if '多' in x else ('空' if '空' in x else '未知'))
  62. # 从成交数量中提取数字(去掉"手"等单位)
  63. def extract_quantity(qty_str):
  64. """从成交数量字符串中提取数字"""
  65. try:
  66. qty_str = str(qty_str).strip()
  67. # 使用正则提取数字(包括负号和小数点)
  68. match = re.search(r'(-?\d+(?:\.\d+)?)', qty_str)
  69. if match:
  70. return abs(float(match.group(1))) # 返回绝对值
  71. return 0
  72. except:
  73. return 0
  74. df['成交数量_数值'] = df['成交数量'].apply(extract_quantity)
  75. # 合并日期和时间为完整时间戳用于排序
  76. def parse_datetime(row):
  77. """解析日期和时间"""
  78. try:
  79. date_str = str(row['日期']).strip()
  80. time_str = str(row['委托时间']).strip()
  81. datetime_str = f"{date_str} {time_str}"
  82. return pd.to_datetime(datetime_str)
  83. except:
  84. return pd.NaT
  85. df['交易时间'] = df.apply(parse_datetime, axis=1)
  86. # 过滤掉无效记录
  87. df = df[df['成交数量_数值'] > 0].copy()
  88. df = df[df['仓位操作'] != '未知'].copy()
  89. df = df[df['方向'] != '未知'].copy()
  90. df = df[~df['交易时间'].isna()].copy()
  91. print(f"解析后有效记录: {len(df)} 条")
  92. return df
  93. def fix_incomplete_pairs(df):
  94. """
  95. 修复不完整的配对(配对ID只出现一次的情况)
  96. 参数:
  97. df (pandas.DataFrame): 已配对的交易数据
  98. 返回:
  99. pandas.DataFrame: 修复后的DataFrame
  100. """
  101. df = df.copy()
  102. max_iterations = 10 # 最大迭代次数,防止无限循环
  103. iteration = 0
  104. print(f"\n开始修复不完整配对...")
  105. while iteration < max_iterations:
  106. iteration += 1
  107. # 统计每个配对ID出现的次数(排除"未配对"和空值)
  108. paired_mask = (df['交易对ID'] != '') & (df['交易对ID'] != '未配对')
  109. paired_df = df[paired_mask]
  110. if len(paired_df) == 0:
  111. break
  112. # 统计每个配对ID出现的次数
  113. pair_id_counts = paired_df['交易对ID'].value_counts()
  114. # 找出只出现一次的配对ID(不完整配对)
  115. incomplete_pair_ids = pair_id_counts[pair_id_counts == 1].index.tolist()
  116. if len(incomplete_pair_ids) == 0:
  117. print(f" 迭代 {iteration}: 没有发现不完整配对,修复完成")
  118. break
  119. print(f" 迭代 {iteration}: 发现 {len(incomplete_pair_ids)} 个不完整配对")
  120. fixed_count = 0
  121. # 处理每个不完整配对
  122. for pair_id in incomplete_pair_ids:
  123. # 找到这个配对ID对应的交易
  124. pair_mask = df['交易对ID'] == pair_id
  125. incomplete_trade = df[pair_mask].iloc[0]
  126. target = incomplete_trade['标的_完整']
  127. direction = incomplete_trade['方向']
  128. operation = incomplete_trade['仓位操作'] # 开仓或平仓
  129. trade_time = incomplete_trade['交易时间']
  130. trade_qty = incomplete_trade['成交数量_数值']
  131. # 查找可匹配的未配对交易
  132. # 条件:相同标的、相同方向、配对ID为空或"未配对"
  133. unpaired_mask = (
  134. (df['标的_完整'] == target) &
  135. (df['方向'] == direction) &
  136. ((df['交易对ID'] == '') | (df['交易对ID'] == '未配对')) # 未配对
  137. )
  138. unpaired_trades = df[unpaired_mask].copy()
  139. if len(unpaired_trades) == 0:
  140. continue
  141. # 根据不完整交易的类型,查找匹配的交易
  142. if operation == '开仓':
  143. # 如果是开仓,查找平仓交易(时间在开仓之后)
  144. matching_trades = unpaired_trades[
  145. (unpaired_trades['仓位操作'] == '平仓') &
  146. (unpaired_trades['交易时间'] >= trade_time)
  147. ].sort_values('交易时间')
  148. else: # operation == '平仓'
  149. # 如果是平仓,查找开仓交易(时间在平仓之前)
  150. matching_trades = unpaired_trades[
  151. (unpaired_trades['仓位操作'] == '开仓') &
  152. (unpaired_trades['交易时间'] <= trade_time)
  153. ].sort_values('交易时间', ascending=False) # 从后往前,优先匹配最近的
  154. if len(matching_trades) == 0:
  155. continue
  156. # 使用贪心算法累加匹配的交易
  157. remaining_qty = trade_qty
  158. matched_indices = []
  159. for idx in matching_trades.index:
  160. match_qty = df.loc[idx, '成交数量_数值']
  161. if remaining_qty <= 0:
  162. break
  163. if match_qty <= remaining_qty:
  164. # 这笔交易可以加入配对
  165. matched_indices.append(idx)
  166. remaining_qty -= match_qty
  167. else:
  168. # 交易数量大于剩余需要量,跳过(保持精确匹配)
  169. continue
  170. # 如果找到精确匹配的组合(剩余数量为0或接近0)
  171. if len(matched_indices) > 0 and abs(remaining_qty) < 0.01:
  172. # 将匹配的交易添加到对应的配对ID中
  173. for idx in matched_indices:
  174. df.loc[idx, '交易对ID'] = pair_id
  175. fixed_count += len(matched_indices)
  176. if operation == '开仓':
  177. print(f" 修复: {target} {direction} 1笔开仓({trade_qty:.0f}手) 匹配 {len(matched_indices)}笔平仓 -> {pair_id}")
  178. else:
  179. print(f" 修复: {target} {direction} 1笔平仓({trade_qty:.0f}手) 匹配 {len(matched_indices)}笔开仓 -> {pair_id}")
  180. if fixed_count == 0:
  181. # 没有修复任何配对,退出循环
  182. print(f" 迭代 {iteration}: 无法修复更多配对,停止")
  183. break
  184. print(f" 迭代 {iteration}: 修复了 {fixed_count} 笔交易")
  185. # 最终检查:统计不完整配对
  186. paired_mask = (df['交易对ID'] != '') & (df['交易对ID'] != '未配对')
  187. paired_df = df[paired_mask]
  188. if len(paired_df) > 0:
  189. pair_id_counts = paired_df['交易对ID'].value_counts()
  190. incomplete_count = len(pair_id_counts[pair_id_counts == 1])
  191. if incomplete_count > 0:
  192. print(f"\n警告: 仍有 {incomplete_count} 个不完整配对无法修复")
  193. else:
  194. print(f"\n修复完成: 所有配对都已完整")
  195. return df
  196. def pair_transactions(df):
  197. """
  198. 为交易进行配对,分配交易对ID
  199. 参数:
  200. df (pandas.DataFrame): 解析后的交易数据
  201. 返回:
  202. pandas.DataFrame: 添加了交易对ID的DataFrame
  203. """
  204. df = df.copy()
  205. df['交易对ID'] = '' # 初始化交易对ID列
  206. # 按交易时间排序
  207. df = df.sort_values('交易时间').reset_index(drop=True)
  208. pair_id_counter = 1 # 交易对ID计数器
  209. paired_count = 0 # 已配对交易数
  210. unpaired_count = 0 # 未配对交易数
  211. # 按标的分组
  212. grouped = df.groupby('标的_完整')
  213. print(f"\n开始配对,共有 {len(grouped)} 个不同标的")
  214. for target, group in grouped:
  215. # 再按方向分组(多/空)
  216. for direction in ['多', '空']:
  217. direction_group = group[group['方向'] == direction].copy()
  218. if len(direction_group) == 0:
  219. continue
  220. # 分离开仓和平仓交易
  221. open_trades = direction_group[direction_group['仓位操作'] == '开仓'].copy()
  222. close_trades = direction_group[direction_group['仓位操作'] == '平仓'].copy()
  223. if len(open_trades) == 0 or len(close_trades) == 0:
  224. # 标记未配对的交易
  225. for idx in direction_group.index:
  226. df.loc[idx, '交易对ID'] = f'未配对'
  227. unpaired_count += 1
  228. continue
  229. # 第零阶段:优先处理同一时间的交易(特别是数量相等的1开1平)
  230. # 按时间分组,处理同一时间点的开仓和平仓
  231. time_groups = direction_group.groupby('交易时间')
  232. for time_key, time_group in time_groups:
  233. time_open_trades = time_group[time_group['仓位操作'] == '开仓'].copy()
  234. time_close_trades = time_group[time_group['仓位操作'] == '平仓'].copy()
  235. if len(time_open_trades) == 0 or len(time_close_trades) == 0:
  236. continue
  237. # 优先匹配数量完全相等的1开1平
  238. for close_idx in time_close_trades.index:
  239. if df.loc[close_idx, '交易对ID'] != '': # 已配对的平仓交易跳过
  240. continue
  241. close_qty = df.loc[close_idx, '成交数量_数值']
  242. # 查找同一时间、数量相等的未配对开仓
  243. matching_open = time_open_trades[
  244. (time_open_trades['成交数量_数值'] == close_qty) &
  245. (df.loc[time_open_trades.index, '交易对ID'] == '')
  246. ]
  247. if len(matching_open) > 0:
  248. # 找到匹配的开仓,优先使用第一个
  249. open_idx = matching_open.index[0]
  250. pair_id = f'P{pair_id_counter:04d}'
  251. df.loc[open_idx, '交易对ID'] = pair_id
  252. df.loc[close_idx, '交易对ID'] = pair_id
  253. paired_count += 2
  254. pair_id_counter += 1
  255. print(f" 同时间1开1平匹配: {target} {direction} {close_qty:.0f}手 -> {pair_id}")
  256. # 第一阶段:多开1平匹配
  257. # 遍历每笔平仓交易,查找可以合并匹配的多笔开仓
  258. for close_idx in close_trades.index:
  259. if df.loc[close_idx, '交易对ID'] != '': # 已配对的平仓交易跳过
  260. continue
  261. close_time = df.loc[close_idx, '交易时间']
  262. close_qty = df.loc[close_idx, '成交数量_数值']
  263. # 查找该平仓交易之前或同一时间的所有未配对开仓交易
  264. # 注意:同一时间的1开1平已经在第零阶段处理,这里主要处理多开1平的情况
  265. valid_open_trades = open_trades[
  266. (open_trades['交易时间'] <= close_time) &
  267. (df.loc[open_trades.index, '交易对ID'] == '') # 未被配对的开仓交易
  268. ].copy()
  269. if len(valid_open_trades) == 0:
  270. continue
  271. # 尝试找到开仓数量之和等于平仓数量的组合
  272. # 使用贪心算法:按时间顺序累加开仓数量
  273. remaining_qty = close_qty
  274. paired_open_indices = []
  275. for open_idx in valid_open_trades.index:
  276. open_qty = df.loc[open_idx, '成交数量_数值']
  277. if remaining_qty <= 0:
  278. break
  279. if open_qty <= remaining_qty:
  280. # 这笔开仓可以加入配对
  281. paired_open_indices.append(open_idx)
  282. remaining_qty -= open_qty
  283. else:
  284. # 开仓数量大于剩余需要量,跳过(保持精确匹配)
  285. continue
  286. # 如果找到精确匹配的组合(剩余数量为0或接近0)
  287. if len(paired_open_indices) > 0 and abs(remaining_qty) < 0.01:
  288. # 为配对的交易分配相同的ID
  289. pair_id = f'P{pair_id_counter:04d}'
  290. for open_idx in paired_open_indices:
  291. df.loc[open_idx, '交易对ID'] = pair_id
  292. df.loc[close_idx, '交易对ID'] = pair_id
  293. paired_count += len(paired_open_indices) + 1 # 开仓+平仓
  294. pair_id_counter += 1
  295. if len(paired_open_indices) > 1:
  296. print(f" 多开1平匹配: {target} {direction} {len(paired_open_indices)}笔开仓({sum([df.loc[idx, '成交数量_数值'] for idx in paired_open_indices]):.0f}手) 匹配 1笔平仓({close_qty:.0f}手) -> {pair_id}")
  297. # 第二阶段:1开多平匹配(原有逻辑)
  298. # 遍历每笔开仓交易,寻找匹配的平仓交易
  299. for open_idx in open_trades.index:
  300. # 跳过已配对的开仓交易
  301. if df.loc[open_idx, '交易对ID'] != '':
  302. continue
  303. open_time = df.loc[open_idx, '交易时间']
  304. open_qty = df.loc[open_idx, '成交数量_数值']
  305. # 查找该开仓交易之后的平仓交易
  306. valid_close_trades = close_trades[
  307. (close_trades['交易时间'] >= open_time) &
  308. (close_trades['交易对ID'] == '') # 未被配对的平仓交易
  309. ].copy()
  310. if len(valid_close_trades) == 0:
  311. # 没有找到匹配的平仓交易
  312. df.loc[open_idx, '交易对ID'] = '未配对'
  313. unpaired_count += 1
  314. continue
  315. # 累计平仓数量,直到等于开仓数量
  316. remaining_qty = open_qty
  317. paired_close_indices = []
  318. for close_idx in valid_close_trades.index:
  319. close_qty_val = df.loc[close_idx, '成交数量_数值']
  320. if remaining_qty <= 0:
  321. break
  322. if close_qty_val <= remaining_qty:
  323. # 这笔平仓完全匹配
  324. paired_close_indices.append(close_idx)
  325. remaining_qty -= close_qty_val
  326. elif close_qty_val > remaining_qty:
  327. # 这笔平仓数量大于剩余需要量,跳过(保持精确匹配)
  328. # 只有在剩余数量很小时才允许部分匹配
  329. continue
  330. # 为配对的交易分配相同的ID(要求精确匹配或接近精确匹配)
  331. if len(paired_close_indices) > 0 and abs(remaining_qty) < 0.01:
  332. pair_id = f'P{pair_id_counter:04d}'
  333. df.loc[open_idx, '交易对ID'] = pair_id
  334. for close_idx in paired_close_indices:
  335. df.loc[close_idx, '交易对ID'] = pair_id
  336. paired_count += len(paired_close_indices) + 1 # 开仓+平仓
  337. pair_id_counter += 1
  338. if len(paired_close_indices) > 1:
  339. print(f" 1开多平匹配: {target} {direction} 1笔开仓({open_qty:.0f}手) 匹配 {len(paired_close_indices)}笔平仓({sum([df.loc[idx, '成交数量_数值'] for idx in paired_close_indices]):.0f}手) -> {pair_id}")
  340. elif len(paired_close_indices) > 0:
  341. # 部分匹配,发出警告但不配对
  342. print(f" 警告: {target} {direction} 开仓在 {open_time} 有 {remaining_qty:.2f} 未配对,跳过配对")
  343. df.loc[open_idx, '交易对ID'] = '未配对'
  344. unpaired_count += 1
  345. else:
  346. # 没有配对成功
  347. df.loc[open_idx, '交易对ID'] = '未配对'
  348. unpaired_count += 1
  349. # 统计信息
  350. print(f"\n配对完成:")
  351. print(f" 已配对交易: {paired_count} 条")
  352. print(f" 未配对交易: {unpaired_count} 条")
  353. print(f" 生成交易对: {pair_id_counter - 1} 对")
  354. # 后处理:修复不完整配对
  355. df = fix_incomplete_pairs(df)
  356. return df
  357. def extract_symbol_core(symbol):
  358. """
  359. 从标的字符串中提取标的核心字母
  360. 参数:
  361. symbol (str): 标的字符串,如"10年期国债期货(T2006.CCFX)"
  362. 返回:
  363. tuple: (括号内完整代码, 标的核心字母)
  364. """
  365. try:
  366. # 提取括号内的内容
  367. match = re.search(r'\(([^)]+)\)', symbol)
  368. if match:
  369. full_code = match.group(1)
  370. # 去掉后面的9位获取标的核心字母
  371. core_symbol = full_code[:-9] if len(full_code) > 9 else full_code
  372. return full_code, core_symbol
  373. else:
  374. return symbol, symbol
  375. except:
  376. return symbol, symbol
  377. def identify_continuous_trade_pairs(df):
  378. """
  379. 识别连续交易对
  380. 参数:
  381. df (pandas.DataFrame): 包含交易对ID的交易数据
  382. 返回:
  383. pandas.DataFrame: 添加了连续交易对ID的DataFrame
  384. """
  385. print("\n开始识别连续交易对...")
  386. df = df.copy()
  387. df['连续交易对ID'] = 'N/A' # 初始化连续交易对ID列
  388. # 提取标的核心字母
  389. df['标的核心字母'] = df['标的'].apply(lambda x: extract_symbol_core(x)[1])
  390. # 获取所有已配对的交易对ID
  391. paired_mask = df['交易对ID'].str.startswith('P', na=False)
  392. paired_df = df[paired_mask].copy()
  393. if len(paired_df) == 0:
  394. print("没有已配对的交易")
  395. return df
  396. # 按交易对ID分组
  397. pair_groups = paired_df.groupby('交易对ID')
  398. # 存储连续交易对关系
  399. continuous_groups = [] # 每个元素是一组连续的交易对ID
  400. processed_pairs = set() # 已处理的交易对ID
  401. for pair_id, group in pair_groups:
  402. if pair_id in processed_pairs:
  403. continue
  404. # 获取当前交易对的开仓和平仓记录
  405. open_trades = group[group['仓位操作'] == '开仓'].copy()
  406. close_trades = group[group['仓位操作'] == '平仓'].copy()
  407. if len(open_trades) == 0 or len(close_trades) == 0:
  408. continue
  409. # 获取当前交易对的关键信息
  410. current_core_symbol = group['标的核心字母'].iloc[0]
  411. current_direction = group['方向'].iloc[0]
  412. current_close_date = close_trades['日期'].iloc[0]
  413. current_close_time = close_trades['委托时间'].iloc[0]
  414. current_close_qty = close_trades['成交数量_数值'].sum()
  415. # 查找匹配的连续交易对
  416. matching_pairs = []
  417. for other_pair_id, other_group in pair_groups:
  418. if other_pair_id == pair_id or other_pair_id in processed_pairs:
  419. continue
  420. # 获取另一个交易对的开仓和平仓记录
  421. other_open_trades = other_group[other_group['仓位操作'] == '开仓'].copy()
  422. other_close_trades = other_group[other_group['仓位操作'] == '平仓'].copy()
  423. if len(other_open_trades) == 0 or len(other_close_trades) == 0:
  424. continue
  425. # 检查条件1:平仓和开仓的日期、委托时间完全一致
  426. other_open_date = other_open_trades['日期'].iloc[0]
  427. other_open_time = other_open_trades['委托时间'].iloc[0]
  428. # 由于可能有多个开仓,检查是否有至少一个与平仓时间完全一致
  429. time_match_found = False
  430. for _, open_trade in other_open_trades.iterrows():
  431. if (open_trade['日期'] == current_close_date and
  432. open_trade['委托时间'] == current_close_time):
  433. time_match_found = True
  434. break
  435. if not time_match_found:
  436. continue
  437. # 检查条件2:交易类型匹配(平多对应开多,平空对应开空)
  438. other_direction = other_group['方向'].iloc[0]
  439. if current_direction != other_direction:
  440. continue
  441. # 检查条件3:标的核心字母一致
  442. other_core_symbol = other_group['标的核心字母'].iloc[0]
  443. if current_core_symbol != other_core_symbol:
  444. continue
  445. # 检查条件4:成交数量绝对值一致
  446. other_open_qty = other_open_trades['成交数量_数值'].sum()
  447. if abs(current_close_qty - other_open_qty) > 0.01:
  448. continue
  449. # 所有条件都满足,这是一个连续交易对
  450. matching_pairs.append(other_pair_id)
  451. processed_pairs.add(other_pair_id)
  452. # 如果找到匹配的连续交易对
  453. if matching_pairs:
  454. # 创建连续交易对组(包含当前交易对和所有匹配的交易对)
  455. continuous_group = [pair_id] + matching_pairs
  456. continuous_groups.append(continuous_group)
  457. processed_pairs.add(pair_id)
  458. print(f" 发现连续交易对组: {continuous_group}")
  459. print(f" 核心标的: {current_core_symbol}, 方向: {current_direction}")
  460. print(f" 平仓时间: {current_close_date} {current_close_time}")
  461. print(f" 平仓数量: {current_close_qty:.2f}, 开仓数量: {other_open_qty:.2f}")
  462. # 为连续交易对分配ID
  463. for i, continuous_group in enumerate(continuous_groups):
  464. continuous_id = f'C{i+1:04d}'
  465. for pair_id in continuous_group:
  466. mask = df['交易对ID'] == pair_id
  467. df.loc[mask, '连续交易对ID'] = continuous_id
  468. print(f"\n识别完成,共发现 {len(continuous_groups)} 组连续交易对")
  469. # 清理临时列
  470. df = df.drop('标的核心字母', axis=1)
  471. return df
  472. def save_result(df, output_path):
  473. """
  474. 保存配对结果到CSV文件
  475. 参数:
  476. df (pandas.DataFrame): 包含交易对ID的DataFrame
  477. output_path (str): 输出文件路径
  478. """
  479. df = df.copy()
  480. # 添加"开仓时间"列
  481. # 对于每个交易对ID,找到对应的开仓记录的"最后更新时间"
  482. df['开仓时间'] = ''
  483. # 添加"交易盈亏"列,根据相同的交易对ID对平仓盈亏进行求和
  484. df['交易盈亏'] = ''
  485. # 添加"连续交易总盈亏"列
  486. df['连续交易总盈亏'] = 'N/A'
  487. # 先计算每个交易对的盈亏
  488. for pair_id in df['交易对ID'].unique():
  489. if pair_id and pair_id.startswith('P'):
  490. # 找到该交易对的所有记录
  491. pair_mask = df['交易对ID'] == pair_id
  492. pair_records = df[pair_mask]
  493. # 找到开仓记录(仓位操作为"开仓")
  494. open_record = pair_records[pair_records['仓位操作'] == '开仓']
  495. if len(open_record) > 0:
  496. # 获取开仓记录的最后更新时间
  497. open_time = open_record.iloc[0]['最后更新时间']
  498. # 将开仓时间填充到该交易对的所有记录中
  499. df.loc[pair_mask, '开仓时间'] = open_time
  500. # 计算该交易对的总盈亏(对平仓盈亏求和)
  501. try:
  502. # 提取平仓盈亏列,转换为数值
  503. # 先转换为字符串,去掉千位分隔符(逗号),然后转换为数值
  504. close_profit_loss_str = pair_records['平仓盈亏'].astype(str).str.replace(',', '')
  505. # 尝试转换为数值,无法转换的设为0
  506. close_profit_loss_numeric = pd.to_numeric(close_profit_loss_str, errors='coerce').fillna(0)
  507. total_profit_loss = close_profit_loss_numeric.sum()
  508. # 将总盈亏填充到该交易对的所有记录中
  509. df.loc[pair_mask, '交易盈亏'] = total_profit_loss
  510. except Exception as e:
  511. # 如果计算失败,设为0
  512. df.loc[pair_mask, '交易盈亏'] = 0
  513. # 计算连续交易总盈亏
  514. for continuous_id in df['连续交易对ID'].unique():
  515. if continuous_id != 'N/A' and pd.notna(continuous_id):
  516. # 找到该连续交易组的所有记录
  517. continuous_mask = df['连续交易对ID'] == continuous_id
  518. continuous_records = df[continuous_mask]
  519. # 计算该连续交易组的总盈亏
  520. try:
  521. # 提取平仓盈亏列,转换为数值
  522. close_profit_loss_str = continuous_records['平仓盈亏'].astype(str).str.replace(',', '')
  523. # 尝试转换为数值,无法转换的设为0
  524. close_profit_loss_numeric = pd.to_numeric(close_profit_loss_str, errors='coerce').fillna(0)
  525. total_continuous_profit = close_profit_loss_numeric.sum()
  526. # 将连续交易总盈亏填充到该组的所有记录中
  527. df.loc[continuous_mask, '连续交易总盈亏'] = total_continuous_profit
  528. except Exception as e:
  529. # 如果计算失败,设为0
  530. df.loc[continuous_mask, '连续交易总盈亏'] = 0
  531. # 移除中间处理列
  532. columns_to_remove = ['标的_完整', '交易类型_标准', '仓位操作', '方向', '成交数量_数值', '交易时间']
  533. output_columns = [col for col in df.columns if col not in columns_to_remove]
  534. # 调整列顺序,确保交易对ID、连续交易对ID、开仓时间、交易盈亏和连续交易总盈亏在最后
  535. if '交易对ID' in output_columns:
  536. output_columns.remove('交易对ID')
  537. if '连续交易对ID' in output_columns:
  538. output_columns.remove('连续交易对ID')
  539. if '开仓时间' in output_columns:
  540. output_columns.remove('开仓时间')
  541. if '交易盈亏' in output_columns:
  542. output_columns.remove('交易盈亏')
  543. if '连续交易总盈亏' in output_columns:
  544. output_columns.remove('连续交易总盈亏')
  545. output_columns.append('交易对ID')
  546. output_columns.append('连续交易对ID')
  547. output_columns.append('开仓时间')
  548. output_columns.append('交易盈亏')
  549. output_columns.append('连续交易总盈亏')
  550. # 按交易对ID和日期升序排序
  551. # 创建排序辅助列:未配对的排在最后,其他按ID数字排序
  552. def get_sort_key(pair_id):
  553. if pd.isna(pair_id) or pair_id == '' or pair_id == '未配对':
  554. return (1, '') # 未配对排在最后
  555. elif isinstance(pair_id, str) and pair_id.startswith('P'):
  556. try:
  557. # 提取数字部分用于排序
  558. num = int(pair_id[1:])
  559. return (0, num) # 已配对的排在前面,按数字排序
  560. except:
  561. return (1, pair_id)
  562. else:
  563. return (1, str(pair_id))
  564. df['_sort_key_id'] = df['交易对ID'].apply(get_sort_key)
  565. # 确保日期列可以排序(转换为datetime类型)
  566. if '日期' in df.columns:
  567. df['_sort_date'] = pd.to_datetime(df['日期'], errors='coerce')
  568. else:
  569. df['_sort_date'] = pd.NaT
  570. # 先按交易对ID排序,再按日期排序
  571. df = df.sort_values(['_sort_key_id', '_sort_date'], ascending=[True, True]).reset_index(drop=True)
  572. df = df.drop(['_sort_key_id', '_sort_date'], axis=1)
  573. # 保存到CSV
  574. try:
  575. df[output_columns].to_csv(output_path, index=False, encoding='utf-8-sig')
  576. print(f"\n结果已保存到: {output_path}")
  577. # 获取文件大小
  578. file_size = os.path.getsize(output_path) / 1024 # KB
  579. print(f"文件大小: {file_size:.2f} KB")
  580. except Exception as e:
  581. print(f"保存文件时出错: {str(e)}")
  582. def print_statistics(df):
  583. """
  584. 打印配对统计信息
  585. 参数:
  586. df (pandas.DataFrame): 包含交易对ID的DataFrame
  587. """
  588. print("\n" + "=" * 60)
  589. print("配对统计信息")
  590. print("=" * 60)
  591. # 统计已配对和未配对
  592. paired = df[df['交易对ID'].str.startswith('P', na=False)]
  593. unpaired = df[df['交易对ID'] == '未配对']
  594. print(f"\n总交易记录: {len(df)} 条")
  595. print(f"已配对交易: {len(paired)} 条 ({len(paired)/len(df)*100:.1f}%)")
  596. print(f"未配对交易: {len(unpaired)} 条 ({len(unpaired)/len(df)*100:.1f}%)")
  597. # 统计交易对数量
  598. unique_pairs = paired['交易对ID'].nunique()
  599. print(f"\n交易对数量: {unique_pairs} 对")
  600. # 统计连续交易对
  601. if '连续交易对ID' in df.columns:
  602. continuous_pairs = df[df['连续交易对ID'] != 'N/A']
  603. unique_continuous_pairs = continuous_pairs['连续交易对ID'].nunique()
  604. print(f"\n连续交易对统计:")
  605. print(f" 连续交易对数量: {unique_continuous_pairs} 组")
  606. print(f" 涉及交易记录: {len(continuous_pairs)} 条")
  607. if len(continuous_pairs) > 0:
  608. # 统计每组连续交易对的交易对数量
  609. continuous_stats = continuous_pairs.groupby('连续交易对ID')['交易对ID'].nunique()
  610. print(f" 每组连续交易对包含的交易对数量分布:")
  611. for continuous_id, pair_count in continuous_stats.items():
  612. print(f" {continuous_id}: {pair_count} 个交易对")
  613. # 统计每个交易对的平仓次数分布
  614. if len(paired) > 0:
  615. pair_counts = paired.groupby('交易对ID').size()
  616. print(f"\n交易对组成分布:")
  617. distribution = pair_counts.value_counts().sort_index()
  618. for count, freq in distribution.items():
  619. if count == 2:
  620. print(f" 1开1平: {freq} 对")
  621. else:
  622. print(f" 1开{count-1}平: {freq} 对")
  623. # 按标的统计
  624. print(f"\n按标的统计:")
  625. target_stats = df.groupby('标的_完整')['交易对ID'].apply(
  626. lambda x: f"总:{len(x)}条, 已配对:{len(x[x.str.startswith('P', na=False)])}条"
  627. )
  628. for target, stats in target_stats.items():
  629. print(f" {target}: {stats}")
  630. def analyze_transaction_pairs(csv_filename=None, output_filename=None):
  631. """
  632. 主函数:分析交易配对
  633. 参数:
  634. csv_filename (str): 输入CSV文件名
  635. output_filename (str): 输出CSV文件名(可选)
  636. """
  637. print("=" * 60)
  638. print("交易配对分析工具")
  639. print("=" * 60)
  640. # 设置文件路径
  641. if csv_filename is None:
  642. csv_filename = 'transaction.csv'
  643. current_dir = _get_current_directory()
  644. csv_path = os.path.join(current_dir, csv_filename)
  645. if not os.path.exists(csv_path):
  646. print(f"错误: 文件不存在 - {csv_path}")
  647. return
  648. # 设置输出文件名
  649. if output_filename is None:
  650. timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
  651. base_name = os.path.splitext(csv_filename)[0]
  652. output_filename = f"{base_name}_paired_{timestamp}.csv"
  653. output_path = os.path.join(current_dir, output_filename)
  654. # 步骤1: 读取CSV
  655. print(f"\n步骤1: 读取CSV文件")
  656. print(f"文件路径: {csv_path}")
  657. df = read_transaction_csv(csv_path)
  658. if len(df) == 0:
  659. print("错误: 无法读取数据")
  660. return
  661. # 步骤2: 解析数据
  662. print(f"\n步骤2: 解析交易数据")
  663. df = parse_transaction_data(df)
  664. if len(df) == 0:
  665. print("错误: 没有有效的交易记录")
  666. return
  667. # 步骤3: 配对交易
  668. print(f"\n步骤3: 配对交易")
  669. df = pair_transactions(df)
  670. # 步骤4: 识别连续交易对
  671. print(f"\n步骤4: 识别连续交易对")
  672. df = identify_continuous_trade_pairs(df)
  673. # 步骤5: 保存结果
  674. print(f"\n步骤5: 保存结果")
  675. save_result(df, output_path)
  676. # 步骤6: 打印统计信息
  677. print_statistics(df)
  678. print("\n" + "=" * 60)
  679. print("分析完成")
  680. print("=" * 60)
  681. # 使用示例
  682. if __name__ == "__main__":
  683. # 可以指定CSV文件名,如果不指定则使用默认的 transaction.csv
  684. analyze_transaction_pairs()
  685. # 或者指定特定文件: analyze_transaction_pairs('transaction.csv')