# -*- coding: UTF-8 -*-
import requests
import pandas as pd
import lxml
from bs4 import BeautifulSoup
import time
import random
import csv
#import codecs
#import unicodecsv as csv
name, score, comment = [], [], []
URL = 'https://ithelp.ithome.com.tw/articles?tab=tech'
headers = {'user-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36'}
res = requests.get(URL, headers=headers)
soup = BeautifulSoup(res.text, 'lxml')
data1 = soup.find_all('h3', {"class": "qa-list__title"})
for d1 in data1:
comment.append(d1.text)
data2 = soup.find_all('div', {"class": "qa-list__condition"})
for d2 in data2:
score.append(d2.text.strip("\n\n0\nLike\n\n0\n").strip())
data3 = soup.find_all('div', {"class": "qa-list__info"})
for d3 in data3:
name.append(d3.text)
#pandas 写入excel格式
res = pd.DataFrame({'日期': name, '瀏覽': score, '評價': comment}, columns=['日期', '瀏覽', '評價'])
res.to_csv("save.csv", encoding='utf_8_sig', index=False)
#URL = ''
#html = requests.get(URL).text
#soup = BeautifulSoup(html,'lxml')
#if requests.get(url).status_code == 200:
# pass
#else print("error")
#def get(page):
# url = 'https://movie.douban.com/subject/6390825/comments?start=+str(page)+"&limit=20&sort=new_score&status=P"' % (page * 20)
# for i in range(1, 21):
# name.append(response.xpath('//*[@id="comments"]/div[%s]/div[2]/h3/span[2]/a' % (i))[0].text)
# score.append(response.xpath('//*[@id="comments"]/div[%s]/div[2]/h3/span[2]/span[2]' % (i))[0].attrib['class'][7])
# comment.append(response.xpath('//*[@id="comments"]/div[%s]/div[2]/p' % (i))[0].text)
example
import gspread_dataframe as gd
from google.colab import auth
auth.authenticate_user()
import requests
import lxml
from bs4 import BeautifulSoup
import pandas as pd
import gspread
from oauth2client.client import GoogleCredentials
gc = gspread.authorize(GoogleCredentials.get_application_default())
sh = gc.create('A new spreadsheet')
# Open our new sheet and add some data.
worksheet = gc.open('A new spreadsheet').sheet1
url="https://tw.stock.yahoo.com/q/q?s=2415"
response = requests.get(url)
soup = BeautifulSoup(response.text, "lxml")
tables = soup.find_all("table")[1]
a = tables.find_all("th")[0:11]
c=[]
b=[c.append(i.getText()) for i in a]
df = pd.DataFrame()
df['年份'] = ['增量', '名义增长率', '排名', '排名变化']
or
df = pd.DataFrame({"id":[1001,1002,1003,1004,1005,1006], "date":pd.date_range('20130102', periods=6), "city":['Beijing ', 'SH', ' guangzhou ', 'Shenzhen', 'shanghai', 'BEIJING '], "age":[23,44,54,32,34,32], "category":['100-A','100-B','110-A','110-C','210-A','130-F'], "price":[1200,2133,5433,4432,12356,45678]},columns =['id','date','city','category','age','price'])
print(df)
gd.set_with_dataframe(worksheet, df)
gd.set_with_dataframe(worksheet, df)