#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ 双色球历史数据自动爬取 + 频率更新脚本 功能: 1. 从官方数据源抓取所有历史期数(含2026年最新期) 2. 统计红球1-33和蓝球1-16的累计频次 3. 更新HTML文件中的RED_FREQ和BLUE_FREQ频次表 使用方法: python update_ssq_freq.py """ import requests import json import re import os from collections import defaultdict # ==================== 配置 ==================== # 官方数据接口(聚合数据,每日更新) API_URL = "http://apis.juhe.cn/lottery/query" API_KEY = "" # 请注册聚合数据免费获取:https://www.juhe.cn/docs/api/id/300 # 备用数据源:500彩票网(JSONP接口,兼容性好) BACKUP_URL = "https://www.500.com/api/lottery/result?name=ssq&num=10000" # HTML模板文件路径(如果使用完整HTML文件) HTML_FILE = "lottery.html" # 输入原HTML文件 OUTPUT_FILE = "lottery_updated.html" # 输出更新后的HTML文件 # 是否需要使用API(自行申请API后设为True) USE_API = False # ==================== 方案1:爬取500彩票网(推荐,无需API)==================== def fetch_from_500(): """ 从500彩票网抓取双色球历史数据 数据格式示例: 2026001:01,02,03,04,05,06+07 """ print("📡 正在从500彩票网抓取数据...") headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' } try: response = requests.get(BACKUP_URL, headers=headers, timeout=30) if response.status_code != 200: print(f"❌ 请求失败,状态码: {response.status_code}") return None # 500彩票网返回的是JSONP格式,需要提取JSON部分 text = response.text # 尝试提取JSON内容 json_match = re.search(r'\((\{.*\})\)', text) if json_match: data = json.loads(json_match.group(1)) else: data = response.json() # 解析开奖数据 results = data.get('data', []) if not results: print("⚠️ 未找到开奖数据,尝试备用解析方式...") # 备用解析:data可能是列表 if isinstance(data, list): results = data elif isinstance(data, dict) and 'list' in data: results = data['list'] red_freq = defaultdict(int) blue_freq = defaultdict(int) total = 0 for item in results: # 获取开奖号码 code = item.get('result', '') or item.get('number', '') or item.get('lottery_result', '') if not code: continue # 解析格式: "01,02,03,04,05,06+07" 或 "01 02 03 04 05 06 + 07" if '+' in code: red_part, blue_part = code.split('+') elif ' + ' in code: red_part, blue_part = code.split(' + ') else: continue # 提取红球号码(去掉非数字) red_numbers = re.findall(r'\d{1,2}', red_part) if len(red_numbers) != 6: continue # 提取蓝球号码 blue_match = re.search(r'\d{1,2}', blue_part) if not blue_match: continue blue_num = int(blue_match.group()) # 累加频率 for r in red_numbers: red_freq[int(r)] += 1 blue_freq[blue_num] += 1 total += 1 if total == 0: print("❌ 未解析到有效开奖数据") return None print(f"✅ 成功解析 {total} 期开奖数据") return {'red': red_freq, 'blue': blue_freq, 'total': total} except Exception as e: print(f"❌ 抓取失败: {e}") return None # ==================== 方案2:使用聚合数据API(需注册)==================== def fetch_from_juhe(api_key): """使用聚合数据API抓取历史数据""" print("📡 正在从聚合数据API抓取数据...") # 先获取最新期号 params = { 'key': api_key, 'lottery_id': 'ssq', 'lottery_no': '', 'num': 100 } try: response = requests.get(API_URL, params=params, timeout=30) if response.status_code != 200: print(f"❌ API请求失败: {response.status_code}") return None data = response.json() if data.get('error_code') != 0: print(f"❌ API返回错误: {data.get('reason', '未知错误')}") return None result = data.get('result', {}) lottery_res = result.get('lottery_res', '') # 解析: "01,02,03,04,05,06|07" if '|' in lottery_res: red_part, blue_part = lottery_res.split('|') reds = [int(x) for x in red_part.split(',') if x] blue = int(blue_part) else: return None # 聚合数据的单次调用只返回最新一期,需要额外处理多期查询 # 这里仅作为示例,实际使用需配置多页分页请求 print("⚠️ 聚合数据API单次只返回一期,完整抓取建议使用500彩票网数据源") return None except Exception as e: print(f"❌ API请求异常: {e}") return None # ==================== 更新HTML文件中的频次数据 ==================== def update_html_file(red_freq, blue_freq, total_issues): """ 更新HTML文件中的RED_FREQ和BLUE_FREQ频次表 """ if not os.path.exists(HTML_FILE): print(f"❌ 未找到 {HTML_FILE},请确保文件在当前目录下") print(" 创建新文件 lottery_updated.html 将基于模板生成") # 从模板创建新文件 create_template_html() with open(HTML_FILE, 'r', encoding='utf-8') as f: content = f.read() # 生成红球频次表代码 red_freq_str = "{\n" for i in range(1, 34): freq = red_freq.get(i, 450) comma = ',' if i < 33 else '' red_freq_str += f" {i}:{freq}{comma}\n" red_freq_str += " }" # 生成蓝球频次表代码 blue_freq_str = "{\n" for i in range(1, 17): freq = blue_freq.get(i, 200) comma = ',' if i < 16 else '' blue_freq_str += f" {i}:{freq}{comma}\n" blue_freq_str += " }" # 替换原频次表 # 匹配 RED_FREQ = { ... }; pattern_red = r'(const RED_FREQ = )\{[^}]*\}(;)' content = re.sub(pattern_red, r'\1' + red_freq_str + r'\2', content, flags=re.DOTALL) # 匹配 BLUE_FREQ = { ... }; pattern_blue = r'(const BLUE_FREQ = )\{[^}]*\}(;)' content = re.sub(pattern_blue, r'\1' + blue_freq_str + r'\2', content, flags=re.DOTALL) # 可选:添加更新日期注释 from datetime import datetime update_note = f' // 数据更新于: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")} | 共计{total_issues}期\n' # 写入新文件 with open(OUTPUT_FILE, 'w', encoding='utf-8') as f: f.write(content) print(f"✅ 已生成更新后的HTML文件: {OUTPUT_FILE}") print(f"📊 数据统计: {total_issues} 期 | 红球频次已更新 | 蓝球频次已更新") def create_template_html(): """如果原HTML文件不存在,创建一个基础模板""" template = '''