ai_member_xiaoxi/process_order.py
2026-03-25 08:00:01 +08:00

69 lines
3.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import pandas as pd
import numpy as np
# 读取表A用户提供的参考表
table_a = pd.read_excel('reference_order.xlsx')
# 重命名列方便匹配
table_a = table_a.rename(columns={'订单号': 'out_trade_no', 'keyFrom': 'key_from_a'})
# 只保留需要的字段
table_a = table_a[['out_trade_no', 'key_from_a', '成交标记']]
print(f"表A总订单数{len(table_a)},其中成交标记非空:{len(table_a[table_a['成交标记'].notna()])}")
# 读取表B导出的3月1日至今订单
table_b = pd.read_csv('2026年3月1日至今订单.csv')
print(f"表B总订单数{len(table_b)}")
# 第一步:匹配重复订单(两个表都有的订单)
merged = pd.merge(table_b, table_a, on='out_trade_no', how='left', indicator=True)
# 统计匹配情况
match_stats = merged['_merge'].value_counts()
print(f"\n匹配结果:")
print(f" 两个表都有的订单:{match_stats.get('both', 0)}条 → 直接使用表A的成交标记")
print(f" 仅表B存在的新增订单{match_stats.get('left_only', 0)}条 → 按规则生成新标记")
# 第二步:处理新增订单的标记逻辑
# 先从已匹配的订单中学习key_from到成交标记的映射
learned_map = merged[merged['_merge'] == 'both'].drop_duplicates('key_from')[['key_from', '成交标记']].set_index('key_from')['成交标记'].to_dict()
print(f"\n从匹配的订单中学习到的key_from→成交标记映射{len(learned_map)}条):")
for k, v in learned_map.items():
if pd.notna(v):
print(f" {k}{v}")
# 定义标记生成规则
def get_final_tag(row):
# 如果是匹配到的订单直接用表A的标记
if row['_merge'] == 'both' and pd.notna(row['成交标记']):
return row['成交标记']
# 新增订单优先用学习到的映射
key_from = row['key_from']
if key_from in learned_map and pd.notna(learned_map[key_from]):
return learned_map[key_from]
# 规则匹配
if key_from.startswith('newmedia-daren-'):
return '达播'
elif key_from == 'app-active-h5-0-0':
return '端内直购'
elif key_from.startswith('sales-adp-') or key_from.startswith('app-sales-'):
return '销转'
elif key_from.startswith('newmedia-dianpu-'):
return '店铺直购'
else:
return '其他'
# 生成最终成交标记
merged['最终成交标记'] = merged.apply(get_final_tag, axis=1)
# 将标记为0的修改为店铺直购
merged['最终成交标记'] = merged['最终成交标记'].replace(0, '店铺直购')
merged['最终成交标记'] = merged['最终成交标记'].replace('0', '店铺直购')
# 清理不需要的字段
final_df = merged.drop(columns=['key_from_a', '_merge', '成交标记']).rename(columns={'最终成交标记': '成交标记'})
# 保存结果
output_file = '2026年3月1日至今订单_含正确成交标记.csv'
final_df.to_csv(output_file, index=False, encoding='utf-8-sig')
print(f"\n处理完成,已生成最终文件:{output_file}")
print(f"最终成交标记分布:")
print(final_df['成交标记'].value_counts())