Linux 服务器自动连接校园网，selenium + geckodriver + firefox浏览器

阿里云国内75折回扣微信号：monov8

阿里云国际，腾讯云国际，低至75折。AWS 93折免费开户实名账号代冲值优惠多多微信号：monov8 飞机：@monov6

文章目录

由于校园网有连接时长限制每次超过时间都需要手动进行登录连接很麻烦。所以使用 python 写了一个脚本可以自动检测网络是否连接并且如果断网自动连接校园网。

设备信息

Linux 服务器 CentOS 7
firefox 浏览器版本Mozilla Firefox 68.5.0esr

建议在安装之前可以在linux 服务器上新建一个项目文件夹以便后续存放文件和日志信息 本人创建的项目文件夹名 auto_connect

一. 下载安装 firefox 浏览器相关驱动

下载之前先要查看服务器上firefox 浏览器的版本并且下载对应版本的selenium 和geckodriver

1. firefox 浏览器版本查看

在服务器终端输入命令

firefox -V

在这里插入图片描述

2. 下载对应版本的 selenium 和 geckodriver 驱动

firefox 浏览器版本与selenium 和 geckodriver 版本对应表
 geckodriver 下载链接

本人选择的selenium 版本是3.141.0 geckodriver 版本是0.29.0
最好在conda 环境下下载需要先激活自己的conda 环境

# 安装selenium命令 在conda 环境下安装
pip install selenium=3.141.0

# 查看安装的selenium版本
import selenium
print(help(selenium))

# 1. 下载geckodriver可以先从本地下载然后上传到服务器
wget https://github.com/mozilla/geckodriver/releases/download/v0.29.0/geckodriver-v0.29.0-linux64.tar.gz

# 2. 解压, 解压后将 geckodriver.exe 放在新建的项目文件夹auto_connect下
tar --xvzf geckodriver-v0.26.0-linux64.tar.gz

# 3. 修改权限
chmod +x geckodriver

# 4. 转移将 geckodriver 复制到 /usr/bin  /usr/local/bin 和python.exe 的目录下
# 复制到 /usr/bin
sudo cp geckodriver /usr/bin

# 复制到 /usr/local/bin
sudo cp geckodriver /usr/local/bin

# 复制到与python.exe 同目录下由于本人的环境conda 环境所以复制到conda 环境下与python.exe 同目录下
sudo cp geckodriver /home/user1/anaconda3/envs/自己的环境名/bin/

二. 自动连接脚本

在新建的auto_connect文件夹下新建auto_connect.py 文件
每次重连的打印信息都会被保存进日志文件auto_connect_log.txt
在conda 环境下运行
在编写脚本之前需要准备的信息

1. 校园网登录的网址
2. 校园网登录的账号和密码
3. 校园网登陆网页中填写账号、密码和点击登录按钮的相应组件

完整代码

from selenium import webdriver
from selenium.webdriver import FirefoxOptions
from bs4 import BeautifulSoup
import time
import requests

username_str = "*******"   # 你的校园网登陆用户名
password_str = "********"   # 你的校园网登陆密码
can_connect = True

def login():
    profile = webdriver.FirefoxProfile()
    # 禁止下载图片根据情况使用
    profile.set_preference("permissions.default.image", 2)  
    # 禁用浏览器缓存
    profile.set_preference("network.http.use-cache", False)
    profile.set_preference("browser.cache.memory.enable", False)
    profile.set_preference("browser.cache.disk.enable", False)
    profile.set_preference("browser.sessionhistory.max_total_viewers", 3)
    profile.set_preference("network.dns.disableIPv6", True)
    profile.set_preference("Content.notify.interval", 750000)
    profile.set_preference("content.notify.backoffcount", 3)
    # 有的网站支持 有的不支持 2 35 profile.set_preference("network.http.pipelining", True)
    profile.set_preference("network.http.proxy.pipelining", True)
    profile.set_preference("network.http.pipelining.maxrequests", 32)

    # print(getCurrentTime(), '2.0 trying to connect')
    with open("auto_connect_log.txt", "a+") as f:
        f.write("\n" + getCurrentTime() + " 0. trying to connect Internet")
    # 2.1 配置opt
    opts = FirefoxOptions()
    opts.add_argument("--headless")
    driver = webdriver.Firefox(options=opts, firefox_profile=profile)

    try:
        # print(getCurrentTime(), '2.1 driver get sucessfully')

        with open("auto_connect_log.txt", "a+") as f:
            f.write("\n" + getCurrentTime() + " 1. driver init successful")
            
        driver.get("***********") # 你的校园网登陆地址
        driver.refresh()

        # print(getCurrentTime(), '2.2 get url sucessful')
        with open("auto_connect_log.txt", "a+") as f:
            f.write("\n" + getCurrentTime() + " 2. the diver init and address insert successful")

        # 2.2 获取账号、密码、登录按钮组件
        # print(driver.find_elements_by_name("DDDDD"))
        # print(driver.find_elements_by_name("upass"))
        # print(driver.find_elements_by_name("0MKKey"))
        username_input = driver.find_elements_by_name("DDDDD")[1]   # 校园网登陆用户名的输入控件ID, 浏览器上右键查看网页源代码查询
        password_input = driver.find_elements_by_name("upass")[1]   # 校园网登陆密码的输入控件ID, 浏览器上右键查看网页源代码查询
        login_button = driver.find_elements_by_name("0MKKey")[1]    # 校园网登陆连接的点击控件ID, 浏览器上右键查看网页源代码查询

        # print(getCurrentTime(), '2.3 username and password get suceessful')
        with open("auto_connect_log.txt", "a+") as f:
            f.write("\n" + getCurrentTime() + " 3. the account password and loginButton get successful")

        # 2.3 发送账号、密码、并点击登录
        username_input.send_keys(username_str)
        # print(getCurrentTime(), '2.4 successsfully input name')
        password_input.send_keys(password_str)
        # print(getCurrentTime(), '2.5 successsfully input pass')
        login_button.click()
        # print(getCurrentTime(), '2.6 successsfully click button')

        # print(getCurrentTime(), '2.7 connect successfully')
        with open("auto_connect_log.txt", "a+") as f:
            f.write("\n" + getCurrentTime() + " 4. Connect successfully \n")

    except Exception as e:
        print(getCurrentTime(), e)
        # 登陆错误
        print(getCurrentTime(), "2.-1 Error Login")
    finally:
        driver.quit()

#获取当前时间
def getCurrentTime():
    return time.strftime('[%Y-%m-%d %H:%M:%S]',time.localtime(time.time()))


#判断当前是否可以连网
def handler():
    try:
        global can_connect
        can_connect = False # 默认判断连接为失败状态
        # 需要设置timeout是因为断网时request会堵住程序, 设置timeout后最多等待10s如果没有request成功, can_connect为False
        baidu_request = requests.get("http://www.baidu.com", timeout=10)
        if(baidu_request.status_code==200):
            # print(getCurrentTime(), "1.1 status = 200")
            baidu_request.encoding = 'utf-8'
            baidu_request_bsObj = BeautifulSoup(baidu_request.text, 'html.parser')
            baidu_input = baidu_request_bsObj.find(value="百度一下")
            if baidu_input==None:
                return False
            can_connect = True # 只有可以request 到百度的网址并且页面中含有“百度一下”这几个字符才判断连接成功
            return True
        else:
            print(getCurrentTime(), '1.2 Offline')
            return False
    except:
        print('error')
        return False

#主函数
def main():
    # 连接校园网脚本开始运行
    print(getCurrentTime(), u"Hi, The Script of auto_connect start to run")
    # time.sleep(600)
    while True:
        # print(getCurrentTime(), "1. checking the internet")
        handler()       # 1. 测试是否有网
        global can_connect
        # print(can_connect)
        if not can_connect:
            # 断网了
            with open("auto_connect_log.txt","a+") as f:
                f.write("\n" + getCurrentTime() + " The Internet disconnect, and I am trying to connect it....")
            # print(getCurrentTime(), "2. The Internet disconnect, and I am trying to connect it....")
            login()     # 2. 尝试连接
        else:
            # 正常
            # print(getCurrentTime(), "All is normal.....")
            time.sleep(300)

main()

三、问题记录

1. Message: Failed to decode response from marionette

自动连接程序运行了一周都是正常的但是突然出现这个问题
使用top 命令查看CPU占有率
top 命令详解

top -bn 1 -i -c

可以看到有很多firefox 进程进一步查看火狐浏览器进程

ps -aux|grep firefox

删除所有firefox 进程重新运行自动连接脚本

pkill firefox

2. driver.get(url) 超时

自动连接脚本成功运行一周但是突然报错driver.get(url) 超时, 无法自动连接
查看 auto_connect 文件夹占有的内存大小发现竟然占用21G

查看文件夹占有大小
du -h --max-depth=1

解决办法

1. 定期手动删除服务器上的auto_connect文件夹 然后从本地重新上传

2. 配置页面加载优化和浏览器缓存
	profile = webdriver.FirefoxProfile()
	# 禁止下载图片根据情况使用
    profile.set_preference("permissions.default.image", 2)  
    # 禁用浏览器缓存
    profile.set_preference("network.http.use-cache", False)
    profile.set_preference("browser.cache.memory.enable", False)
    profile.set_preference("browser.cache.disk.enable", False)
    profile.set_preference("browser.sessionhistory.max_total_viewers", 3)
    profile.set_preference("network.dns.disableIPv6", True)
    profile.set_preference("Content.notify.interval", 750000)
    profile.set_preference("content.notify.backoffcount", 3)
    # 有的网站支持 有的不支持 2 35 profile.set_preference("network.http.pipelining", True)
    profile.set_preference("network.http.proxy.pipelining", True)
    profile.set_preference("network.http.pipelining.maxrequests", 32)

 	driver = webdriver.Firefox(options=opts, firefox_profile=profile)

3. 每次driver.get(url)刷新窗口
	 driver.get("***************") # 你的校园网登陆地址
	 driver.refresh()

3. requests.exceptions.ConnectionError: HTTPConnectionPool(host=‘www.baidu.com’, port=80): Max retries exceeded with url

HTTPConnectionPool(host=‘www.baidu.com’, port=80): Max retries exceeded with url: / (Caused by NewConnectionError(‘<urllib3.connection.HTTPConnection object at 0x7f576363cf10>: Failed to establish a new connection: [Errno -2] Name or service not known’))

handler() 函数中通过request.get(url) 与 baidu 建立 TCP 连接默认为keep-alive即连接一次传输多次然而在多次访问后不能结束并回到连接池中导致不能产生新的连接

阿里云国内75折回扣微信号：monov8

阿里云国际，腾讯云国际，低至75折。AWS 93折免费开户实名账号代冲值优惠多多微信号：monov8 飞机：@monov6