234 lines
8.7 KiB
Python
234 lines
8.7 KiB
Python
|
# 请先安装必要的库: pip install tqdm requests
|
|||
|
import os
|
|||
|
import json
|
|||
|
import threading
|
|||
|
import requests
|
|||
|
from tqdm import tqdm
|
|||
|
|
|||
|
|
|||
|
def fetch_lesson_data(page_num=1, page_size=10, search_keyword=""):
|
|||
|
"""
|
|||
|
发送POST请求获取云课堂课程数据
|
|||
|
|
|||
|
参数:
|
|||
|
page_num: 页码,默认为1
|
|||
|
page_size: 每页数量,默认为10
|
|||
|
search_keyword: 搜索关键词,默认为空
|
|||
|
|
|||
|
返回:
|
|||
|
解析后的JSON数据,如果请求失败则返回None
|
|||
|
"""
|
|||
|
# API URL
|
|||
|
url = "https://yx.ccsjy.cn/api/cloud-school/v1/cloudLesson/getOnDemandLessonPage"
|
|||
|
|
|||
|
# 请求参数
|
|||
|
payload = {
|
|||
|
"pageNum": page_num,
|
|||
|
"pageSize": page_size,
|
|||
|
"businessBookId": "3029115d4afa424f8160adb04bd10e6a",
|
|||
|
"businessEditionId": "19F93E4A7C9B4B589EB001FBFEE6230A",
|
|||
|
"excellentFlag": "",
|
|||
|
"nodeId": None,
|
|||
|
"nodeType": 1,
|
|||
|
"stageCode": "3",
|
|||
|
"subjectCode": "314",
|
|||
|
"sortType": 2,
|
|||
|
"source": "",
|
|||
|
"searchKeyword": search_keyword
|
|||
|
}
|
|||
|
|
|||
|
try:
|
|||
|
# 发送POST请求
|
|||
|
response = requests.post(url, json=payload)
|
|||
|
|
|||
|
# 检查响应状态
|
|||
|
if response.status_code == 200:
|
|||
|
# 解析JSON响应
|
|||
|
data = response.json()
|
|||
|
return data
|
|||
|
else:
|
|||
|
print(f"请求失败,状态码: {response.status_code}")
|
|||
|
return None
|
|||
|
except Exception as e:
|
|||
|
print(f"请求异常: {str(e)}")
|
|||
|
return None
|
|||
|
|
|||
|
|
|||
|
# 创建多线程下载类
|
|||
|
class MultiThreadDownloader:
|
|||
|
def __init__(self, url, file_path, num_threads=4):
|
|||
|
self.url = url
|
|||
|
self.file_path = file_path
|
|||
|
self.num_threads = num_threads
|
|||
|
self.total_size = 0
|
|||
|
self.progress = None
|
|||
|
|
|||
|
def get_file_size(self):
|
|||
|
response = requests.head(self.url, allow_redirects=True)
|
|||
|
if 'content-length' in response.headers:
|
|||
|
self.total_size = int(response.headers['content-length'])
|
|||
|
return True
|
|||
|
return False
|
|||
|
|
|||
|
def download_chunk(self, start, end, thread_id):
|
|||
|
headers = {'Range': f'bytes={start}-{end}'}
|
|||
|
response = requests.get(self.url, headers=headers, stream=True)
|
|||
|
|
|||
|
chunk_size = 1024
|
|||
|
with open(f'{self.file_path}.part{thread_id}', 'wb') as f:
|
|||
|
for data in response.iter_content(chunk_size=chunk_size):
|
|||
|
f.write(data)
|
|||
|
if self.progress:
|
|||
|
self.progress.update(len(data))
|
|||
|
|
|||
|
def merge_chunks(self):
|
|||
|
with open(self.file_path, 'wb') as f:
|
|||
|
for i in range(self.num_threads):
|
|||
|
part_file = f'{self.file_path}.part{i}'
|
|||
|
if os.path.exists(part_file):
|
|||
|
with open(part_file, 'rb') as pf:
|
|||
|
f.write(pf.read())
|
|||
|
os.remove(part_file)
|
|||
|
|
|||
|
def start(self):
|
|||
|
# 确保目录存在
|
|||
|
os.makedirs(os.path.dirname(self.file_path), exist_ok=True)
|
|||
|
|
|||
|
if not self.get_file_size():
|
|||
|
print(f"无法获取文件大小,使用单线程下载: {self.url}")
|
|||
|
# 回退到单线程下载
|
|||
|
try:
|
|||
|
response = requests.get(self.url, stream=True)
|
|||
|
with open(self.file_path, 'wb') as f:
|
|||
|
total_size = int(response.headers.get('content-length', 0))
|
|||
|
with tqdm(total=total_size, unit='B', unit_scale=True, desc=self.file_path.split('\\')[-1]) as pbar:
|
|||
|
for data in response.iter_content(chunk_size=1024):
|
|||
|
f.write(data)
|
|||
|
pbar.update(len(data))
|
|||
|
return True
|
|||
|
except Exception as e:
|
|||
|
print(f"单线程下载失败: {str(e)}")
|
|||
|
return False
|
|||
|
|
|||
|
# 多线程下载
|
|||
|
chunk_size = self.total_size // self.num_threads
|
|||
|
self.progress = tqdm(total=self.total_size, unit='B', unit_scale=True, desc=self.file_path.split('\\')[-1])
|
|||
|
|
|||
|
threads = []
|
|||
|
for i in range(self.num_threads):
|
|||
|
start = i * chunk_size
|
|||
|
end = self.total_size - 1 if i == self.num_threads - 1 else (i + 1) * chunk_size - 1
|
|||
|
thread = threading.Thread(target=self.download_chunk, args=(start, end, i))
|
|||
|
threads.append(thread)
|
|||
|
thread.start()
|
|||
|
|
|||
|
# 等待所有线程完成
|
|||
|
for thread in threads:
|
|||
|
thread.join()
|
|||
|
|
|||
|
self.progress.close()
|
|||
|
self.merge_chunks()
|
|||
|
return True
|
|||
|
|
|||
|
|
|||
|
# 测试函数
|
|||
|
if __name__ == "__main__":
|
|||
|
# 创建下载目录
|
|||
|
down_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "Down")
|
|||
|
os.makedirs(down_dir, exist_ok=True)
|
|||
|
|
|||
|
# 获取总记录数
|
|||
|
first_page = fetch_lesson_data(page_num=1, page_size=1)
|
|||
|
if first_page:
|
|||
|
total = first_page['data']['total']
|
|||
|
page_size = 10 # 每页获取10条
|
|||
|
total_pages = (total + page_size - 1) // page_size
|
|||
|
print(f"获取成功,共{total}条记录")
|
|||
|
|
|||
|
# 遍历所有页面
|
|||
|
all_lessons = []
|
|||
|
for page_num in range(1, total_pages + 1):
|
|||
|
print(f"正在获取第{page_num}/{total_pages}页数据...")
|
|||
|
result = fetch_lesson_data(page_num=page_num, page_size=page_size)
|
|||
|
if result and result['data']['rows']:
|
|||
|
all_lessons.extend(result['data']['rows'])
|
|||
|
else:
|
|||
|
print(f"第{page_num}页数据获取失败")
|
|||
|
|
|||
|
print(f"成功获取{len(all_lessons)}条课程数据")
|
|||
|
|
|||
|
# 准备保存下载信息
|
|||
|
download_info = []
|
|||
|
success_count = 0
|
|||
|
fail_count = 0
|
|||
|
|
|||
|
# 遍历所有课程
|
|||
|
for idx, lesson in enumerate(all_lessons, 1):
|
|||
|
lesson_id = lesson.get('lessonId')
|
|||
|
lesson_name = lesson.get('lessonName')
|
|||
|
print(f"\n处理课程 {idx}/{len(all_lessons)}: {lesson_name}")
|
|||
|
|
|||
|
# 提取视频URL
|
|||
|
lesson_resources = lesson.get('lessonResources', [])
|
|||
|
video_url = None
|
|||
|
if lesson_resources:
|
|||
|
# 假设第一个资源是视频
|
|||
|
video_url ="https://ccschool.edusoa.com/"+ lesson_resources[0].get('fileUrl', None)
|
|||
|
safe_name=lesson_resources[0].get('fileUrl', None).split('/')[-1]
|
|||
|
|
|||
|
if video_url:
|
|||
|
# 生成安全的文件名
|
|||
|
file_path = os.path.join(down_dir, f"{safe_name}")
|
|||
|
|
|||
|
# 检查文件是否已存在
|
|||
|
if os.path.exists(file_path):
|
|||
|
print(f"文件已存在,跳过下载: {file_path}")
|
|||
|
download_info.append({
|
|||
|
"course_name": lesson_name,
|
|||
|
"course_id": lesson_id,
|
|||
|
"file_path": file_path,
|
|||
|
"status": "已存在"
|
|||
|
})
|
|||
|
continue
|
|||
|
|
|||
|
# 开始下载
|
|||
|
print(f"开始下载视频: {video_url}")
|
|||
|
downloader = MultiThreadDownloader(video_url, file_path)
|
|||
|
if downloader.start():
|
|||
|
print(f"视频下载成功: {file_path}")
|
|||
|
download_info.append({
|
|||
|
"course_name": lesson_name,
|
|||
|
"course_id": lesson_id,
|
|||
|
"file_path": file_path,
|
|||
|
"status": "成功"
|
|||
|
})
|
|||
|
success_count += 1
|
|||
|
else:
|
|||
|
print(f"视频下载失败: {video_url}")
|
|||
|
download_info.append({
|
|||
|
"course_name": lesson_name,
|
|||
|
"course_id": lesson_id,
|
|||
|
"file_path": file_path,
|
|||
|
"status": "失败"
|
|||
|
})
|
|||
|
fail_count += 1
|
|||
|
else:
|
|||
|
print("未找到视频URL")
|
|||
|
download_info.append({
|
|||
|
"course_name": lesson_name,
|
|||
|
"course_id": lesson_id,
|
|||
|
"file_path": "",
|
|||
|
"status": "无视频URL"
|
|||
|
})
|
|||
|
fail_count += 1
|
|||
|
|
|||
|
# 每5个课程保存一次下载信息
|
|||
|
if idx % 5 == 0 or idx == len(all_lessons):
|
|||
|
info_file = os.path.join(down_dir, "download_info.json")
|
|||
|
with open(info_file, 'w', encoding='utf-8') as f:
|
|||
|
json.dump(download_info, f, ensure_ascii=False, indent=2)
|
|||
|
print(f"已保存下载信息到: {info_file}")
|
|||
|
|
|||
|
print(f"\n下载完成!成功: {success_count}, 失败: {fail_count}")
|
|||
|
else:
|
|||
|
print("获取课程数据失败")
|