【原创】Insight论坛爬虫更新 V1.3(重构代码,新增自动签到功能)(面面出品,必属精品)
把上次写的爬虫认真重构了代码,版本最V1.3。 新版改动:
- 新增自动签到功能
- 新增错误抛出
- 修复网络问题、超过回复次数限制等造成的程序崩溃
- 重构代码,修复一些BUG
老规矩,先上图,后面贴出完整代码。 申明:本人所写爬虫代码仅做学习、交流用,擅自挪做其他用途本人概不负责。
完整代码(里面账户、书籍路径等信息,请自行修改) # WebRobot version 1.3 # Time: 03/02/2021 # Author: mianmian # Email: weepingrabbitmianmian@gmail.com # Update explanation: # 1.Add auto check-in # 2.Add Error-pop # 3.Refactored functions # 4.Repair some bugs
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import os
import time
# 账号管理
def account_list():
account_list = [['你的账号', '密码'], ['你的账号', '密码'], ['你的账号', '密码'],
['你的账号', '密码']]
return account_list
# 关闭电脑
def shutdown_computer(time):
os.system('shutdown -s -f -t {}'.format(time))
# 删除已读书籍
def remove_book_file(book_number_path):
os.remove(book_number_path) # 删除已读书籍
# 自动签到
def check_in(CHECKIN_URL, driver):
driver.get(CHECKIN_URL) # 进入签到页面
# 判断是否签到
try:
driver.find_element_by_id('checkin').click()
except:
print('今日已签到!')
# 自动回复
def auto_reply(BASIC_URL, book_number_path, reply_number, driver):
driver.get(BASIC_URL)
# 进入灌水页面
driver.find_element_by_partial_link_text('永久灌水').click()
driver.find_element_by_tag_name('html').send_keys(Keys.END)
with open(book_number_path, encoding='utf-8', mode='r') as file:
sam = 0
for line in file:
# 内容大于15字符就输出
if len(line) > 15:
sam += 1
try:
# 点击CodeMirror并激活,再输入内容
css_panel = driver.find_element_by_id("reply_form")
code_mirror_element = css_panel.find_element_by_css_selector(".CodeMirror")
driver.execute_script("arguments[0].CodeMirror.setValue(arguments[1]);", code_mirror_element, line)
# 点击回复
driver.find_element_by_xpath("//body/div[@id='main']/div[@id='content']/div[3]/div[2]/form[1]/div[1]/div[1]/div[3]/input[1]").click()
print('已经成功回复: {} 次'.format(sam))
time.sleep(3)
except:
print('每次最多回复200次')
break
if sam >= reply_number:
break
# web机器人主程序
def webrobot_main(BASIC_URL, CHECKIN_URL, username, userpassword, bookname, driver_path, reply_number):
try:
driver = webdriver.Chrome(driver_path) # 开启Chrome浏览器驱动
book_number_path = book_path + '{}.txt'.format(bookname) # 获取书名文件路径
# 登录网站
driver.get(BASIC_URL)
driver.find_element_by_link_text('登录').click()
driver.find_element_by_name('name').send_keys(username)
driver.find_element_by_name('pass').send_keys(userpassword)
driver.find_element_by_class_name('span-primary').click()
check_in(CHECKIN_URL, driver) # 开始签到
auto_reply(BASIC_URL, book_number_path, reply_number, driver) # 开始回复
except Exception as error:
print(error)
return webrobot_main(BASIC_URL, CHECKIN_URL, username, userpassword, bookname, driver_path, reply_number)
driver.close()
remove_book_file(book_number_path) # 删除已读书籍
bookname += 1 # 统计已读书名
return bookname
if __name__ == '__main__':
# URL
BASIC_URL = 'https://insightcj.com'
CHECKIN_URL = 'https://insightcj.com/checkin'
# 路径和文件
driver_path = 'C:\chromedriver\chromedriver.exe'
book_path = 'D:/Books/'
file_name = 'number.txt'
reply_number = 195
# 书名和账号
bookname = int(open(book_path + file_name, 'r').read()) # 获取书名
account_list = account_list() # 获取账号
# 账号登录和切换
for account in account_list:
username = account[0]
userpassword = account[1]
print('\n账号:{}'.format(username))
# 开始web机器人,返回已读书名
bookname = webrobot_main(BASIC_URL, CHECKIN_URL, username, userpassword, bookname, driver_path, reply_number)
open(book_path + file_name, 'w+').write(str(bookname)) # 更新已读书名
print('\n任务已完成!!!')
shutdown_computer(20) # 20秒后关闭电脑
10 回复
沙发自己坐
申精
哇,不错,回家时测试下。谢谢楼主了, 个人觉得还是很方便的
虽然看的不是很懂,但是我感觉很牛!!
请教下除了自动签到,这个还有啥子用啊?没懂
@aaaaa 灌水区自动回复。
@aaaaa 看楼主的第一个版本,你就知道了。https://insightcj.com/topic/6036ff0194eb0b496f205393
源码来个GitHUB链接呀。 兄弟, 我闲着没事可以去试试看
做的很好 给个赞
@littletim 谢谢