# 请先安装必要的库: pip install tqdm requests import os import json import threading import requests from tqdm import tqdm def fetch_lesson_data(page_num=1, page_size=10, search_keyword=""): """ 发送POST请求获取云课堂课程数据 参数: page_num: 页码,默认为1 page_size: 每页数量,默认为10 search_keyword: 搜索关键词,默认为空 返回: 解析后的JSON数据,如果请求失败则返回None """ # API URL url = "https://yx.ccsjy.cn/api/cloud-school/v1/cloudLesson/getOnDemandLessonPage" # 请求参数 payload = { "pageNum": page_num, "pageSize": page_size, "businessBookId": "3029115d4afa424f8160adb04bd10e6a", "businessEditionId": "19F93E4A7C9B4B589EB001FBFEE6230A", "excellentFlag": "", "nodeId": None, "nodeType": 1, "stageCode": "3", "subjectCode": "314", "sortType": 2, "source": "", "searchKeyword": search_keyword } try: # 发送POST请求 response = requests.post(url, json=payload) # 检查响应状态 if response.status_code == 200: # 解析JSON响应 data = response.json() return data else: print(f"请求失败,状态码: {response.status_code}") return None except Exception as e: print(f"请求异常: {str(e)}") return None # 创建多线程下载类 class MultiThreadDownloader: def __init__(self, url, file_path, num_threads=4): self.url = url self.file_path = file_path self.num_threads = num_threads self.total_size = 0 self.progress = None def get_file_size(self): response = requests.head(self.url, allow_redirects=True) if 'content-length' in response.headers: self.total_size = int(response.headers['content-length']) return True return False def download_chunk(self, start, end, thread_id): headers = {'Range': f'bytes={start}-{end}'} response = requests.get(self.url, headers=headers, stream=True) chunk_size = 1024 with open(f'{self.file_path}.part{thread_id}', 'wb') as f: for data in response.iter_content(chunk_size=chunk_size): f.write(data) if self.progress: self.progress.update(len(data)) def merge_chunks(self): with open(self.file_path, 'wb') as f: for i in range(self.num_threads): part_file = f'{self.file_path}.part{i}' if os.path.exists(part_file): with open(part_file, 'rb') as pf: f.write(pf.read()) os.remove(part_file) def start(self): # 确保目录存在 os.makedirs(os.path.dirname(self.file_path), exist_ok=True) if not self.get_file_size(): print(f"无法获取文件大小,使用单线程下载: {self.url}") # 回退到单线程下载 try: response = requests.get(self.url, stream=True) with open(self.file_path, 'wb') as f: total_size = int(response.headers.get('content-length', 0)) with tqdm(total=total_size, unit='B', unit_scale=True, desc=self.file_path.split('\\')[-1]) as pbar: for data in response.iter_content(chunk_size=1024): f.write(data) pbar.update(len(data)) return True except Exception as e: print(f"单线程下载失败: {str(e)}") return False # 多线程下载 chunk_size = self.total_size // self.num_threads self.progress = tqdm(total=self.total_size, unit='B', unit_scale=True, desc=self.file_path.split('\\')[-1]) threads = [] for i in range(self.num_threads): start = i * chunk_size end = self.total_size - 1 if i == self.num_threads - 1 else (i + 1) * chunk_size - 1 thread = threading.Thread(target=self.download_chunk, args=(start, end, i)) threads.append(thread) thread.start() # 等待所有线程完成 for thread in threads: thread.join() self.progress.close() self.merge_chunks() return True # 测试函数 if __name__ == "__main__": # 创建下载目录 down_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "Down") os.makedirs(down_dir, exist_ok=True) # 获取总记录数 first_page = fetch_lesson_data(page_num=1, page_size=1) if first_page: total = first_page['data']['total'] page_size = 10 # 每页获取10条 total_pages = (total + page_size - 1) // page_size print(f"获取成功,共{total}条记录") # 遍历所有页面 all_lessons = [] for page_num in range(1, total_pages + 1): print(f"正在获取第{page_num}/{total_pages}页数据...") result = fetch_lesson_data(page_num=page_num, page_size=page_size) if result and result['data']['rows']: all_lessons.extend(result['data']['rows']) else: print(f"第{page_num}页数据获取失败") print(f"成功获取{len(all_lessons)}条课程数据") # 准备保存下载信息 download_info = [] success_count = 0 fail_count = 0 # 遍历所有课程 for idx, lesson in enumerate(all_lessons, 1): lesson_id = lesson.get('lessonId') lesson_name = lesson.get('lessonName') print(f"\n处理课程 {idx}/{len(all_lessons)}: {lesson_name}") # 提取视频URL lesson_resources = lesson.get('lessonResources', []) video_url = None if lesson_resources: # 假设第一个资源是视频 video_url ="https://ccschool.edusoa.com/"+ lesson_resources[0].get('fileUrl', None) safe_name=lesson_resources[0].get('fileUrl', None).split('/')[-1] if video_url: # 生成安全的文件名 file_path = os.path.join(down_dir, f"{safe_name}") # 检查文件是否已存在 if os.path.exists(file_path): print(f"文件已存在,跳过下载: {file_path}") download_info.append({ "course_name": lesson_name, "course_id": lesson_id, "file_path": file_path, "status": "已存在" }) continue # 开始下载 print(f"开始下载视频: {video_url}") downloader = MultiThreadDownloader(video_url, file_path) if downloader.start(): print(f"视频下载成功: {file_path}") download_info.append({ "course_name": lesson_name, "course_id": lesson_id, "file_path": file_path, "status": "成功" }) success_count += 1 else: print(f"视频下载失败: {video_url}") download_info.append({ "course_name": lesson_name, "course_id": lesson_id, "file_path": file_path, "status": "失败" }) fail_count += 1 else: print("未找到视频URL") download_info.append({ "course_name": lesson_name, "course_id": lesson_id, "file_path": "", "status": "无视频URL" }) fail_count += 1 # 每5个课程保存一次下载信息 if idx % 5 == 0 or idx == len(all_lessons): info_file = os.path.join(down_dir, "download_info.json") with open(info_file, 'w', encoding='utf-8') as f: json.dump(download_info, f, ensure_ascii=False, indent=2) print(f"已保存下载信息到: {info_file}") print(f"\n下载完成!成功: {success_count}, 失败: {fail_count}") else: print("获取课程数据失败")