python3爬虫练习

python3爬虫练习

#通过使用python爬虫抓取的方式使用百度搜索引擎。
#该方法仅仅是用于练习一下抓取方法。
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
def search_baidu(query):
url = “https://www.baidu.com/s”
chrome_options = Options()
chrome_options.add_argument(“–headless”)
#chrome_options.add_argument(“–user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.5563.11 Safari/537.3”)
#driver = webdriver.Chrome(executable_path=’E:\软件\chrome_driver\111.0.5563.64\chromedriver_win32\chromedriver.exe’,options=chrome_options)
#使用executable_path的方式,python会给出一个警告,但是使用options的方式没有这个问题
driver = webdriver.Chrome(options=chrome_options)
driver.get(url + “?wd=” + query)
soup = BeautifulSoup(driver.page_source, ‘html.parser’)

results = soup.find_all(‘div’, class_=’c-container’)

for index, result in enumerate(results):
if index >= 20:
break

title = result.find(‘h3’).text
link = result.find(‘a’)[‘href’]
print(f”{index + 1}. {title}\n{link}\n”)


if __name__ == “__main__”:
query = input(“请输入搜索内容:”)
search_baidu(query)

Comments are closed.