URL = "http://www.dytt8.net" headers = { 'Referer':'http://www.dytt8.net/', 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.108 Safari/537.36' }
defgetftp(link): try: global num r = requests.get(URL+link,headers=headers) r.encoding=r.apparent_encoding web = r.text movies = re.findall(r'"(ftp[^\'"]+)"',web) name = re.search('<title>.+《(.+)》.+<\/title>',web).group(1) tplt = "{0:{2}^10}\t{1:{2}^90}\n"#定义格式化字符串 for movie in movies: List.append(tplt.format(name,movie,chr(12288))) num += 1 print(num) print(movie,name) except: print("error getftp") pass
num = 0 defbfs(url): Q = queue.Queue() #定义一个队列 Q.put(url) global num whilenot Q.empty(): try: url = Q.get() r = requests.get(URL+url,headers=headers) r.encoding=r.apparent_encoding text = r.text links = re.findall(r'[^\'"<>]+\.html',text) except: continue for link in links: if link in LINK: continue getftp(link) LINK.add(link) Q.put(link) if num>times: return
#写入文件 with open('movies.txt','w+',encoding='utf-8') as movies: for strs in List: movies.write(strs) with open('urls.txt','w+',encoding='utf-8') as url_file: for link in LINK: url_file.write(URL+link+'\n')