import numpy as np
import pandas as pd
import requests as req
from bs4 import BeautifulSoup
from bs4 import BeautifulSoup
python編寫一個簡單的程序?# 獲取體育新聞并存儲到文件中
def getNewsHtml(url):
# 爬取過程中可能會出現爬取失敗的情況,一旦失敗停止爬取
try:
r = req.get(url, headers={'user-agent': 'Mozilla/5.0'})
r.raise_for_status()
python字符串join用法、html = r.text
return html
except:
return "Error"
# 爬取新聞信息
def getNewDate(html):
頭歌python答案及解析第六章?# 使用BeautifulSoup類解析網頁源碼
soup = BeautifulSoup(html, "html.parser")
# 獲取新聞標題
title = soup.select("div.LEFT > h1")
# 打印新聞標題
print(title[0].text)
python代碼怎么寫。# 獲取新聞發布時間
mata = soup.find_all("meta", attrs={"name": "apub:time"})[0].attrs["content"]
print(mata)
# 獲取新聞主題內容
cntents = soup.select("div.content-article > p.one-p")
text = ""
python輸入10個數并進行排序,n = 0
# 循環遍歷contents中的p標簽
for p in cntents:
if n > 1:
# 拼接內容
text = text+p.text
python中三個數從小到大排序。n = n + 1
return [title[0].text, text, mata]
# 循環爬取urls數組中的路徑
def forNewUrl(urls):
List = []
for url in urls:
python程序設計答案、# 爬取頁面源碼
html = getNewsHtml(url)
# 返回新聞頁面數據集合
newdata = getNewDate(html)
List.append(newdata)
return List
初學編程100個代碼?# 用來保存新聞數據
def saveNewDate(ListNewsDate,newPath):
writer = pd.ExcelWriter(newPath)
# 將數據轉為DataFrame格式,用來存儲在excel表格中
df= pd.DataFrame(ListNewsDate,columns=["NewTilte","NewContent","createtime"])
#
爬蟲python?df.to_excel(writer, sheet_name="ListNewsDate1")
writer.save()
# 爬取新聞的頁面ur路徑
# url = "https://new.qq.com/rain/a/SPO2019121602087000"
urls = ["https://new.qq.com/rain/a/SPO2019121602087000",
"https://new.qq.com/omn/20191218/20191218A0NMFX00.html",
python經典算法大全。"https://new.qq.com/omn/20191218/20191218A0OTX800.html",
"https://new.qq.com/omn/20191218/20191218A0JR4H00.html",
"https://new.qq.com/omn/20191218/20191218A0OO9M00.html",
"https://new.qq.com/omn/20191218/20191218A0JVAA00.html",
"https://new.qq.com/omn/20191218/20191218A0HDXZ00.html",
"https://new.qq.com/omn/20191218/20191218A0F26Y00.html",
python選擇排序最簡單寫法?"https://new.qq.com/omn/20191218/20191218A0F1T500.html",
"https://new.qq.com/omn/20191218/20191218A0ENJ800.html",
"https://new.qq.com/omn/20191218/20191218A0E85400.html",
"https://new.qq.com/rain/a/20191218A0CEBN00",
"https://new.qq.com/omn/20191218/20191218A0CAJB00.html",
"https://new.qq.com/omn/20191218/20191218A0BPK400.html",
python基本結構?"https://new.qq.com/omn/20191218/20191218A0BNTG00.html",
"https://new.qq.com/rain/a/20191218A0BNI300",
"https://new.qq.com/omn/20191218/20191218A0BM8G00.html",
"https://new.qq.com/omn/20191218/20191218A0BFS000.html",
"https://new.qq.com/omn/20191218/20191218A0B3AT00.html",
"https://new.qq.com/rain/a/20191218A0B0CI00",
寫一段python代碼、"https://new.qq.com/omn/20191218/20191218A0AUGQ00.html",
"https://new.qq.com/omn/20191218/20191218A0A42300.html"
,"https://new.qq.com/omn/20191218/20191218A09YES00.html",
"https://new.qq.com/omn/20191218/20191218A09XPJ00.html",
"https://new.qq.com/omn/20191218/20191218A09MW500.html",
"https://new.qq.com/omn/20191218/20191218A09AGO00.html",
Python 程序設計。"https://new.qq.com/omn/20191218/20191218A08E6V00.html",
"https://new.qq.com/omn/20191218/20191218A067ZI00.html",
"https://new.qq.com/omn/20191218/20191218A046ZD00.html",
"https://new.qq.com/omn/20191218/20191218A0424P00.html"]
def run():
ListNewsDate = forNewUrl(urls)
python安裝好后怎么寫代碼?saveNewDate(ListNewsDate, "ListNewsDate.xlsx")
#執行代碼
run()
版权声明:本站所有资料均为网友推荐收集整理而来,仅供学习和研究交流使用。
工作时间:8:00-18:00
客服电话
电子邮件
admin@qq.com
扫码二维码
获取最新动态