#!/usr/bin/python
import os
from google.colab import drive
from google.colab import files
from google.colab import auth
auth.authenticate_user()
from oauth2client.client import GoogleCredentials
import requests
import lxml
from bs4 import BeautifulSoup
import time
import sqlite3
drive.mount('/content/drive', force_remount=False)
global path
path = "/content/drive/MyDrive/TS/"
os.listdir(path)
response = requests.get("https://tw.stock.yahoo.com/q/q?s=2451")
soup = BeautifulSoup(response.text, "lxml")
tables = soup.find_all("table")[1]
ths = tables.find_all("th")[0:11]
thx = tables.find_all("td")[1:12]
titles = tuple(th.getText().strip() for th in ths)
data = tuple(da.getText().strip() for da in thx)
#上面先爬取data,我懒啊,用返以前既例子做说明(⚈ᴗ⚈),记得MyDrive系无空格既喇,如果唔系下面会建立唔到个db file#以下系先建立一个资料库,配置容器来存放你爬返来既data conn = sqlite3.connect('/content/drive/MyDrive/yahoo.db')
#第一句先系colab整个空白既database摆野
conn.execute('''CREATE TABLE IF NOT EXISTS yahoo_stock(titles TEXT NOT NULL, data TEXT NOT NULL);''')
#第二句开始黎料喇,先系yahoo.db内建立一个用来摆data既容器yahoo_stock,然后我地目标系整返个二维数据, titles同data对应上面爬到既野(可算是database既两个columns),佢地既属性都系文字 TEXT 同埋佢地唔系等于无野NOT NULL,因为要将你爬既野放落去, 如果系数字既话就改TEXT做INTEGER,记得INTEGER属性既只可以放数字唔系文字啊,要不然就error喇
conn.commit()
#类似submit必须要的
print("Database is created successfully!")
#以下系属于写入资料部分c = conn.cursor()
#模拟鼠标点击list=[]
for i in range(len(titles)):
list.append([titles[i], data[i]])
#建立多组二维数据到list内
for i in list:
values = i
c.execute('insert into yahoo_stock(titles, data) values (?, ?)', values) #变数系执行写入数据库指令,最好用?来代替,避免外来强行插入不必要攻击
conn.commit()
#再submit你提交既插入资料
#print(values)
#以下系查询数据库内资料,支持条件过滤观看资料
print("Openning database successfully")
#print("Total number of rows updated :", conn.total_changes)
#cursor = conn.execute("SELECT * from yahoo_stock where data > 69.4")
#cursor = conn.execute("SELECT * from yahoo_stock where titles = '最高'")
cursor = conn.execute("SELECT * from yahoo_stock")
for row in cursor:
#print("title = ", row[0])
print(row[0:2])
print("Operation done successfully")
#以pandas df 选择titles同data两个columns来读取
a=pd.read_sql('SELECT titles, data FROM yahoo_stock', conn)
print('\n\n')
print("以pandas df 读取资料库内yahoo_stock table")
print(a)
conn.close()
#pandas除左可以以column形式写入数据到excel外,仲可以写入到sqlite3 database
import pandas as pd
import sqlite3
conn = sqlite3.connect('/content/drive/MyDrive/yahoo.db')
c = conn.cursor()
c.execute('CREATE TABLE IF NOT EXISTS yahoo_stock (titles text, data text)')
conn.commit()
#以字典形式写入两个column - titles 同 data
datapd = {'titles': ['Computer','Tablet','Monitor','Printer'],
'data': [900,300,450,150]
}
df = pd.DataFrame(datapd, columns= ['titles','data'])
#以下以pandas dataframe 写入sql
#用replace会先删除数据库内所有野然后再输入panda df既数据
#df.to_sql('yahoo_stock', conn, if_exists='replace', index = False)
#用append会把数据叠加到数据库后面
df.to_sql('yahoo_stock', conn, if_exists='append', index = False)
#显示刚上面写入的资料库内容
c.execute('''
SELECT * FROM yahoo_stock
''')
for row in c.fetchall():
print (row)
pandas转df的模样:
df = pd.DataFrame({'col name': titles, 'col name1': data}, columns=['col name', 'col name1'])
Read Excel ..... Example:
#!/usr/bin/python
import os
from google.colab import drive
from google.colab import files
from google.colab import auth
auth.authenticate_user()
from oauth2client.client import GoogleCredentials
import requests
import lxml
from bs4 import BeautifulSoup
import time
import sqlite3
import pandas as pd
drive.mount('/content/drive', force_remount=False)
global path
path = "/content/drive/MyDrive/"
df = pd.read_csv('/content/drive/MyDrive/animation.csv', error_bad_lines=False)
#dfname._stat_axis.values.tolist() # 獲取行名 row
# e.g. df.iloc[0].values.tolist() 以第0行輸出為list
#dfname.columns.values.tolist() # 獲取列名 col ['動畫名稱', '聲優', '地方', '年份', '評分']
col_title=df.columns.values.tolist()
#print(df)
conn = sqlite3.connect('/content/drive/MyDrive/animation.db')
conn.execute('CREATE TABLE IF NOT EXISTS animation ({} text, {} text, {} text, {} text, {} text);'.format(col_title[0],col_title[1],col_title[2],col_title[3],col_title[4])) #建立資料表內table同columns
conn.commit() #<---記得要comit,如果唔係上面table就不能建立
print("資料表建立ok")
df.to_sql('animation', conn, if_exists='replace', index=False) #dataframe 直接轉存 SQlite (replace ===>取代及覆蓋所有TABLE內資料 append ===>在已存在的資料後疊加)
conn.commit() #<---記得要comit,如果唔係上面轉存動作會被取消
print("pandas_df已成功轉存為Sqlite資料庫格式!")
#以下為資料庫插入資料
aa="one punch man"
bb="kerek"
cc="HongKong"
dd="2021"
ee="9.5"
sql=conn.cursor() #connect db with mouse
#sql.execute('''INSERT INTO animation(動畫名稱, 聲優, 地方, 年份, 評分) values (?, ?, ?, ?, ?);''', (aa, bb, cc, dd, ee) )
#sql.execute('''INSERT INTO animation(動畫名稱, 聲優, 地方, 年份, 評分) SELECT 'one punch man', 'kerek', 'HongKong', '2021', '10.0' WHERE NOT EXISTS (SELECT * FROM animation WHERE 動畫名稱=? AND 聲優=? AND 地方=? AND 年份=? AND 評分=?);''', (aa, bb, cc, dd, ee) )
sql.execute('''INSERT INTO animation(動畫名稱, 聲優, 地方, 年份, 評分) SELECT ?, ?, ?, ?, ? WHERE NOT EXISTS (SELECT * FROM animation WHERE 動畫名稱=? AND 聲優=? AND 地方=? AND 年份=? AND 評分=?);''', (aa, bb, cc, dd, ee, aa, bb, cc, dd, ee) )
conn.commit()
#以下為filter或查看資料庫內容
word=input("")
sql=conn.cursor()
#sql.execute("SELECT * from animation WHERE 評分 %s"%(word))
sql.execute("SELECT 評分, 動畫名稱, 年份 from animation WHERE 評分 %s"%(word)) #(評分, 動畫名稱, 年份) 呢三樣係你想係輸出度睇到既野
conn.commit()
#sql.execute("SELECT * from animation") # * select all
#sql.execute("SELECT 地方 from animation")
#sql.execute("SELECT 年份 from animation")
#sql.execute("SELECT 評分, 動畫名稱, 年份 from animation")
for row in sql.fetchall():
print(row)
conn.close()
pandas remove duplicate values of column
import pandas as pd
data = [['Google',10],['Runoob',12],['Wiki',13],['Google',10],['Wiki',13]]
df=pd.DataFrame(data,columns=['Site','Age'])
print(df)
full=df.drop_duplicates(subset = 'Site', keep = "first")
print(full)
#print(len(full))獲取行數