登录 白背景

study/spiderDemo.md

import requests
from lxml import etree

#创建会话对象
s = requests.Session()
#超时时间
s.timeout = 3
s.headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'
}

#获取网页内容
htmlData = s.get('https://m00zik.com/wiki/').text
# print(htmlData)

htmlXpath = etree.HTML(htmlData)

#获取网页title
print(htmlXpath.xpath("//title/text()"))

#获取所有链接
allLinks = htmlXpath.xpath("//a/@href")
print("页面链接数量:{}".format(len(allLinks)))


#获取所有图片
allImgs = htmlXpath.xpath("//img/@src")
print(allImgs)
print("页面图片数量:{}".format(len(allImgs)))
['index.md - 魔力之所wiki']
页面链接数量:465
['https://img1.gamersky.com/upimg/pic/2017/09/05/201709050856385769.jpg', 'https://img1.gamersky.com/upimg/pic/2017/09/05/201709050856385769.jpg']
页面图片数量:2