# -*- coding: UTF-8 -*-
import gspread
from oauth2client.service_account import ServiceAccountCredentials
import requests
import lxml
from bs4 import BeautifulSoup
scopes = ["https://spreadsheets.google.com/feeds"]
credentials = ServiceAccountCredentials.from_json_keyfile_name("credentials.json", scopes)
client = gspread.authorize(credentials)
sheet = client.open_by_key("1q9EJlySLhEqMuJm7bX3NGgys-v_3xqB-nFYwgrNJGM8").sheet1
def getlink():
page_links_list=[]
pages=input("separate with space without comma").split()
# pages=["2415","2514"]
for page in pages:
url="https://tw.stock.yahoo.com/q/q?s="+str(page)
page_links_list.append(url)
return page_links_list
#print(getlink())
for i in getlink():
print(i)
response = requests.get("https://tw.stock.yahoo.com/q/q?s=2451")
soup = BeautifulSoup(response.text, "lxml")
tables = soup.find_all("table")[1]
ths = tables.find_all("th")[0:11]
thx = tables.find_all("td")[1:12]
titles = ("Date of data",) + tuple(th.getText() for th in ths)
data = ("Date",) + tuple(da.getText() for da in thx)
sheet.clear()
sheet.insert_row(titles, 1) #write to row no. 1
sheet.insert_row(data, 2)
print(sheet.get_all_values())
credentials.json of your root directory
{
"type": "service_account",
"project_id": "second-abacus-293014",
"private_key_id": "848dd0b6ad098058ddb202e3d2ee60b81835b18f",
"private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQC6FfAKjS1Gw3jM\nqeyZDqoKsqafCM6GbZx30GbZjmsbAr4ws1JOnBo8xDhlC4ZgMr3LMxN1O691JiWA\nmbdBeRphrEI5sgDvfIdo+Rq/N92BEVDOCvh069B9MTCXK7QWR/3qczpJfI/pxhLn\nCnjCBtMLL9+uNkTa8E0wj+pcTa4LuGsCRkmmXevNkESZF7Hvxqq+QUBcH6e5nuNK\ndWyn/RXRPc+QdxLTfn9UP4vkN7iN8ojcot91hqRWn3sZlJx03h3wYxrBl2QYLUBK\nwc8h8+aoritvzRhXErZ3VcYEEzmlsQrg6NQvWiouvhAmW9ZGq0bs5HsBHWAxc9gK\nrc4gCtWdAgMBAAECggEABDGcYFCANbkQDuTBfU2UTXtrP+hN5z+vKBAdUUfhaBFu\nweA4uabQ/I+bicNxutdJ6btz889ti+FZNQXB2IrnKk5ci7pURRF0Z8w9axPSPSdr\nv8kFgnUSFNA3/SAyBdKHiXIhXSiegbFCGczlndHTjcv9aYq1kI2b371Oldf2VTM/\n95ym4+wRUMUqs1Xnc8z/7KNzuOlR6pcDSBVUYN/6Dt+b/sg+NV4jSq5h+05m+qPN\nMPwD5rxJhaDy3s8XXQOlNjHK9+p65bRgqxySqO63+PfTyAhXRadS1oYkgGxrHpZs\n3X/FJdeh/KEvm0h/CP7Ee4BTU8G3epOnwfsdGxYBYQKBgQDaTcvFsE8Pu7rzajA1\nAkJKZcHSnCYPQj7oQrDUfU7wFzlJBWBDvsqF03OB+qX27uHFS/JlB7ivkS73Y1H+\npR44tnItaDNYW1ta+DBwqzdIFHT4njlg8GcpxbgYSVbyATeAgoewz0Y4C/8/iR0S\n/MLdBx/erDj0SWPEWK7cNG8L+QKBgQDaN+ymLgyQs6JzlnKH5fxdSd+PqJGhfqq0\npN3+OCNTgpyNwUxeJBGg9zvkLnxjU1xc0iuO7KUuJ/q9pO83tIlCVEke+Gy+k4Vl\nJqzLOVB7Rm9+ENG2tW3vKx6gocpl+SLMw9Zd2k9Amy0CGopzTQ/ZTGjHTUImLeOl\n3ui+lO1XxQKBgAIbyEETj9/uB0M8nlSUvgEr9HBNU8zH7gWeR0j2IOIOx13eMGzX\nFeuu/CmgOyQkcIfQUPoK1gzUESkQi4UzIRSZJocrv3s1/q7lQMqFzNXkZLDWDmXS\nTJzJrRqG8tEJk/ioyhi6F8qIyyFY+K/rQxKRdAxZi39Iii8mUCIOW5g5AoGBAJF4\noJjzr8qzBcP6cmVoCwGBorhZDqRTFaeRFLBJajtcodLaHnfPobTlvBZUavWn97Wl\nIgaIeWE4o63TCunev3XpTEmKIKw3GcEhtCVSEvXrMZ4fxj3/2saKv0MkOBIcsHdC\nwmAUjaqrB6kUWmGc6MjaaCsctNFQ3CaSYCPUkhhlAoGBAIEpsT4DupHqZApKFjqb\neQ9PJeVeKXcbzrS3vZt2b0nZwrM5m2e3hQvw5EK+XwQ+629ktBkNlMd7QWG/g6VU\nR3tMb4QNlXu/xa9Rf5SGMQFGTEbj8GCsIgNiBwkZVgpzAtoZtdO1zqsgVGKhQ1Rl\nYE7WCANinL2nPD34xHoNVDgz\n-----END PRIVATE KEY-----\n",
"client_email": "aixyn1khd-a8kxjeyhyynbmj@second-abacus-293014.iam.gserviceaccount.com",
"client_id": "116014604720294077792",
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
"token_uri": "https://oauth2.googleapis.com/token",
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/aixyn1khd-a8kxjeyhyynbmj%40second-abacus-293014.iam.gserviceaccount.com"
}
google colab
https://colab.research.google.com/drive/1A4-QMnk0I86qbTs0QJJvpH2n4aMdMLyB?usp=sharing
sample:
# -*- coding: UTF-8 -*-
import gspread
from oauth2client.service_account import ServiceAccountCredentials
import requests
import lxml
from bs4 import BeautifulSoup
scopes = ["https://spreadsheets.google.com/feeds"]
credentials = ServiceAccountCredentials.from_json_keyfile_name("credentials.json", scopes)
client = gspread.authorize(credentials)
sheet = client.open_by_key("1q9EJlySLhEqMuJm7bX3NGgys-v_3xqB-nFYwgrNJGM8").sheet1
sheet.clear()
response = requests.get('https://tw.stock.yahoo.com/q/q?s=2415')
soup = BeautifulSoup(response.text, "lxml")
tables = soup.find_all("table")[1]
ths = tables.find_all("th")[0:11]
titles = ("資料日期",) + tuple(th.getText() for th in ths)
sheet.insert_row(titles, 1)
def getlink():
page_links_list=[]
pages=input("separate with space without comma!").split()
for page in pages:
url="https://tw.stock.yahoo.com/q/q?s="+str(page)
page_links_list.append(url)
return page_links_list
for i in getlink():
response = requests.get(i)
soup = BeautifulSoup(response.text, "lxml")
tables = soup.find_all("table")[1]
thx = tables.find_all("td")[1:12]
data = ("--",) + tuple(da.getText() for da in thx)
sheet.insert_row(data, 2)
print(sheet.get_all_values())
Colab example:
from google.colab import auth
auth.authenticate_user()
import requests
import lxml
from bs4 import BeautifulSoup
import gspread
from oauth2client.client import GoogleCredentials
gc = gspread.authorize(GoogleCredentials.get_application_default())
sh = gc.create('A new spreadsheet')
# Open our new sheet and add some data.
worksheet = gc.open('your spreadsheet name').sheet1
url="https://tw.stock.yahoo.com/q/q?s=2415"
response = requests.get(url)
soup = BeautifulSoup(response.text, "lxml")
tables = soup.find_all("table")[1]
a = tables.find_all("th")[0:11]
b=[i.getText() for i in a]
worksheet.insert_row(b, 1)