在Python中使用Selenium獲取QQ空間好友說說

from bs4 import BeautifulSoup

from selenium import webdriver

import time

# #使用Selenium的webdriver例項化一個瀏覽器物件，在這裡使用Phantomjs

# driver = webdriver。PhantomJS（executable_path=r“D：\phantomjs-2。1。1-windows\bin\phantomjs。exe”）

# #設定Phantomjs視窗最大化

# driver。maximize_window（）

# 登入QQ空間

def get_shuoshuo（qq）：

chromedriver = r“D：\soft\chromedriver_win32\chromedriver。exe”

driver = webdriver。Chrome（chromedriver）

#使用get（）方法開啟待抓取的URL

driver。get（‘http：//user。qzone。qq。com/{}/311’。format（qq））

time。sleep（5）

#等待5秒後，判斷頁面是否需要登入，透過查詢頁面是否有相應的DIV的id來判斷

try：

driver。find_element_by_id（‘login_div’）

a = True

except：

a = False

if a == True：

#如果頁面存在登入的DIV，則模擬登入

driver。switch_to。frame（‘login_frame’）

driver。find_element_by_id（‘switcher_plogin’）。click（）

driver。find_element_by_id（‘u’）。clear（） # 選擇使用者名稱框

driver。find_element_by_id（‘u’）。send_keys（‘QQ號碼’）

driver。find_element_by_id（‘p’）。clear（）

driver。find_element_by_id（‘p’）。send_keys（‘QQ密碼’）

driver。find_element_by_id（‘login_button’）。click（）

time。sleep（3）

driver。implicitly_wait（3）

#判斷好友空間是否設定了許可權，透過判斷是否存在元素ID：QM_OwnerInfo_Icon

try：

driver。find_element_by_id（‘QM_OwnerInfo_Icon’）

b = True

except：

b = False

#如果有許可權能夠訪問到說說頁面，那麼定位元素和資料，並解析

if b == True：

driver。switch_to。frame（‘app_canvas_frame’）

content = driver。find_elements_by_css_selector（‘。content’）

stime = driver。find_elements_by_css_selector（‘。c_tx。c_tx3。goDetail’）

for con， sti in zip（content， stime）：

data = {

‘time’： sti。text，

‘shuos’： con。text

}

print（data）

pages = driver。page_source

soup = BeautifulSoup（pages， ‘lxml’）

#嘗試一下獲取Cookie，使用get_cookies（）

cookie = driver。get_cookies（）

cookie_dict = ［］

for c in cookie：

ck = “{0}={1}；”。format（c［‘name’］， c［‘value’］）

cookie_dict。append（ck）

i = ‘’

for c in cookie_dict：

i += c

print（‘Cookies：’， i）

print（“==========完成================”）

driver。close（）

driver。quit（）

if __name__ == ‘__main__’：

get_shuoshuo（‘好友QQ號碼’）

程式碼簡析

1。照例，匯入需要使用的模組：

from bs4 import BeautifulSoup

from selenium import webdriver

import time

2。使用Selenium的webdriver例項化一個瀏覽器物件，在這裡使用Phantomjs：

driver = webdriver。PhantomJS（executable_path=“D：\\phantomjs。exe”）

3。設定Phantomjs視窗最大化：

driver。maximize_window（）

4。主函式部分

使用get（）方法開啟待抓取的URL：

driver。get（‘http：//user。qzone。qq。com/{}/311’。format（qq））

等待5秒後，判斷頁面是否需要登入，透過查詢頁面是否有相應的DIV的id來判斷：

try：

driver。find_element_by_id（‘login_div’）

a = True

except：

a = False

如果頁面存在登入的DIV，則模擬登入：

driver。switch_to。frame（‘login_frame’） #切換到登入ifram

driver。find_element_by_id（‘switcher_plogin’）。click（）

driver。find_element_by_id（‘u’）。clear（）#選擇使用者名稱框

driver。find_element_by_id（‘u’）。send_keys（‘QQ號’）

driver。find_element_by_id（‘p’）。clear（）#選擇密碼框

driver。find_element_by_id（‘p’）。send_keys（‘QQ密碼’）

driver。find_element_by_id（‘login_button’）。click（）#點選登入按鈕

time。sleep（3）

接著，判斷好友空間是否設定了許可權，透過判斷是否存在元素ID：QM_OwnerInfo_Icon

try：

driver。find_element_by_id（‘QM_OwnerInfo_Icon’）

b = True

except：

b = False

如果有許可權能夠訪問到說說頁面，那麼定位元素和資料，並解析：

if b == True：

driver。switch_to。frame（‘app_canvas_frame’）

content = driver。find_elements_by_css_selector（‘。content’）

stime = driver。find_elements_by_css_selector（‘。c_tx。c_tx3。goDetail’）

for con，sti in zip（content，stime）：

data = {

# ‘qq’：qq，

‘time’：sti。text，

‘shuos’：con。text

}

print（data）

除了在Selenium中解析資料，我們還可以將當前頁面儲存為原始碼，再使用BeautifulSoup來解析：

pages = driver。page_source

soup = BeautifulSoup（pages，‘lxml’）

最後，我們嘗試一下獲取Cookie，使用get_cookies（）：

cookie = driver。get_cookies（）

cookie_dict = ［］

for c in cookie：

ck = “{0}={1}；”。format（c［‘name’］，c［‘value’］）

cookie_dict。append（ck）

i = ‘’

for c in cookie_dict：

i += c

print（‘Cookies：’，i）

另外，再介紹兩個Selenium的常用方法：

- 儲存螢幕截圖：

driver。save_screenshot（‘儲存的檔案路徑及檔名’）

執行JS指令碼：

driver。execute_script（“JS程式碼”）

在Python中使用Selenium獲取QQ空間好友說說

相關文章