收到的QQ坦白说通常会提供两条信息,比如‘一个天蝎座的男生’,‘一个认识5年的女生’,那如何找出符合以上条件的好友呢?如果你有几百甚至几千个好友,一个一个找不现实。自动寻找方法当然是通过网络爬虫获取信息再分析过滤实现。
思路:
1. 登录自己QQ空间,获取所有好友QQ号
2. 通过好友QQ号,爬好友空间,获得好友的星座、性别、年龄等个人信息
3. 通过自己QQ号和好友QQ号,获取认识天数,因为认识天数是和两个你和你的好友两个QQ号一起决定的
4. 如果遇到好友空间不让你访问,这个不影响以上信息,因为就算你不能访问好友空间,也可以看见好友的个人信息
5. 如果好友没有填写信息,你当然获取不到相关信息
编程语言:Python
用到的库:
from selenium import webdriver from selenium.webdriver.chrome.options import
Optionsimport time import re import datetime
用到的知识:
1. 通过网址访问云端获取好友信息
2. 用selenium和浏览器配合抓取网页
3. 用正则表达式等处理网页数据,获得有效信息


注意:频繁抓取网页会被腾讯当成违规操作,造成封号两个小时,我的QQ已经被封号两次了,现在还在封号中,通过每获取5个好友信息就退出登录,等两分钟后再登录也许,只是也许可以解决问题。

代码如下(改进登录时间):
下载代码也可以在链接:https://download.csdn.net/download/gengli2017/10619273
<https://download.csdn.net/download/gengli2017/10619273>
对于初学者,想知道一些代码意思,尽快写一个理解说明。
初学时也找不到资料。
from selenium import webdriver from selenium.webdriver.chrome.options import
Optionsimport time import re import datetime def frankSpeak(account, password) :
myQQ = account driver = loginQQ(account, password) gtk, g_qzonetoken =
getGtk_Token(driver) friendsDict = getFriends(driver, myQQ, gtk, g_qzonetoken)
i =1 friendInfoVector = [] for friendQQ in friendsDict : i += 1 (age, city,
constellation, gender, province) = getFriendInfo(driver, friendQQ, gtk,
g_qzonetoken) knownTime = getKnownTime(driver, myQQ, friendQQ, gtk,
g_qzonetoken) friendInfo = (friendQQ, friendsDict[friendQQ], age, gender,
constellation, knownTime, province, city)
#(age,city,constellation,gender,province) #male:gender=1, famale:gender=0,
else:gender=2 friendInfoVector.append(friendInfo) #to aviod your QQ locked by
Tenser, find 5 friends infomation then reload if(i % 5 == 0) : time.sleep(120)
driver = loginQQ(account, password) gtk, g_qzonetoken = getGtk_Token(driver)
print('一个双鱼座的女生') for i in range(len(friendInfoVector)) : if
(friendInfoVector[i][4] == '双鱼座' and gender == '0') : print(friendInfoVector[i][
1]) print('所有好友信息') print('(QQ号, 备注, 年龄, 性别, 星座, 认识时间, 省份, 城市)') print('女:性别=0
男:性别=1 未注明:性别=2') for i in range(len(friendInfoVector)) :
print(friendInfoVector[i])# print(gtk) # print(g_qzonetoken) #
print(friendsDick) #登录QQ,获取QQ页面 def loginQQ(account, password) : chrome_options
= Options() chrome_options.add_argument("--disable-infobars") driver =
webdriver.Chrome()#这个是chormedriver的地址 driver.get('https://qzone.qq.com/')
driver.switch_to.frame('login_frame') driver.find_element_by_id(
'switcher_plogin').click() driver.find_element_by_id('u').clear()
driver.find_element_by_id('u').send_keys(account) driver.find_element_by_id('p'
).clear() driver.find_element_by_id('p').send_keys(password)
driver.find_element_by_id('login_button').click() time.sleep(2) return driver
#从Cookie获取GTK def getGTKFromCookie(cookie): hashes = 5381 for letter in cookie[
'p_skey']: hashes += (hashes << 5) + ord(letter) return hashes & 0x7fffffff
#获取gtk和g_qzonetoken def getGtk_Token(driver) : cookie = {} #初始化cookie字典 for elem
in driver.get_cookies(): #取cookies cookie[elem['name']] = elem['value'] gtk =
getGTKFromCookie(cookie) html = driver.page_source g_qzonetoken=re.search(
'window\.g_qzonetoken = \(function\(\)\{ try\{return (.*?);\} catch\(e\)',html)
#从网页源码中提取g_qzonetoken g_qzonetoken = str(g_qzonetoken[0]).split('\"')[1] return
gtk, g_qzonetoken#获得好友列表 def getFriends(driver, myQQ, gtk, g_qzonetoken) :
friendUrl =
'https://user.qzone.qq.com/proxy/domain/r.qzone.qq.com/cgi-bin/tfriend/friend_hat_get.cgi?hat_seed=1&uin='
+ str(myQQ) +'&fupdate=1&g_tk='+str(gtk)+'&qzonetoken='+str(g_qzonetoken)+
'&g_tk='+str(gtk) driver.get(friendUrl) friend_list = driver.page_source
friend_list = str(friend_list ) pattern = re.compile(
'\"(.\d*)\":\{\\n"realname":"(.*?)"}',re.S) QQ_name_list = re.findall(pattern,
str(friend_list)) friendDick=dict()#numList => (QQnum:QQname) for friend in
QQ_name_list: friendDick[str(friend[0])]=str(friend[1]) return friendDick
#通过好友QQ号获取好友信息 def getFriendInfo(driver, friendQQ, gtk, g_qzonetoken) :
friendInfoUrl ='https://mobile.qzone.qq.com/profile_get?qzonetoken='
+str(g_qzonetoken) +'&g_tk='+str(gtk)+'&format=json&hostuin=' + str(friendQQ)
driver.get(friendInfoUrl) friendInfo = driver.page_source friendInfo =
str(friendInfo)#savefile(str(friendQQ)+'txt', friendInfo) pattern = re.compile(
r'"age":(\d*).*"city":"(\w*)".*"constellation":"(\w*).*"gender":(-?\d*).*"province":"(\w*)"'
) usefulInfo = pattern.findall(friendInfo)if (any(usefulInfo)) : return
usefulInfo[0] else : return ('-1','NULL','NULL','2','NULL')
#usefulInfo=[(age,city,constellation,gender,province)]
#usefulInfo[0]=(age,city,constellation,gender,province) #male:gender=1,
famale:gender=0, else:gender=2 #通过自己QQ和好友QQ获取认识的时间 def getKnownTime(driver,
myQQ, friendQQ, gtk, g_qzonetoken) : knownDaysUrl =
'https://user.qzone.qq.com/proxy/domain/r.qzone.qq.com/cgi-bin/friendship/cgi_friendship?activeuin='
+ str(myQQ) +'&passiveuin=' + str(friendQQ) +'&situation=1&isCalendar=1&g_tk='
+str(gtk) +'&qzonetoken='+str(g_qzonetoken)+'&g_tk='+str(gtk)
driver.get(knownDaysUrl) knownDaysInfo = driver.page_source knownDaysInfo =
str(knownDaysInfo) beginStamp = re.search(r'"addFriendTime":(\d+)',
knownDaysInfo) beginStamp = str(beginStamp.group(1)) beginTime =
datetime.date.fromtimestamp(int(beginStamp)) beginY = beginTime.year beginM =
beginTime.month beginD = beginTime.day lastStamp = re.search(
r'"systemTime":(\d+)', knownDaysInfo) lastStamp = str(lastStamp.group(1))
lastTime = datetime.date.fromtimestamp(int(lastStamp)) lastY = lastTime.year
lastM = lastTime.month lastD = lastTime.dayif(lastY > beginY) : return
(str(lastY - beginY +1) + '年') elif(lastM > lastM) : return (str(lastM - beginM
+1) + '月') else : return (str(lastD - beginD + 1) + '日' #运行程序,把QQNumber换成要登录QQ,
password换成密码 frankSpeak('QQNumber', 'password')

友情链接
KaDraw流程图
API参考文档
OK工具箱
云服务器优惠
阿里云优惠券
腾讯云优惠券
华为云优惠券
站点信息
问题反馈
邮箱:[email protected]
QQ群:637538335
关注微信