import lxml
from bs4 import BeautifulSoup
import time
import random
import csv
import codecs
import unicodecsv as csv
import json
import urllib.request as req
url = 'https://hk.appledaily.com/pf/api/v3/content/fetch/query-feed?query=%7B%22feedQuery%22%3A%22_id%3DBOYXWJJ3QVG2ZMR6SYQFRH4A6Y%2520LSBS6UUXMRD2FJGYLBIGMGBIKA%252055LGLUZT6VB35LLUWYIUJYKCIY%2520VRRZ756IDJEMLBAZ2ORNA5KJAE%2520CDVHR4KICBBX3LWHIPQ6RFCWUA%2520CNYVVX2NPREJ5KLDY7HEIWONYM%2520RLSZZQYRONECPJX5C3FVNWHLPA%2520DCTTQKKXJZAOFEAM456Y3GQNBM%25206PKBF5GGWRCNVFDW43W6IMNXLM%25205MB7VMECKJF3XPAWVJ2CU2XOWI%25207HV62OCQ3JGGTHQ4YO5SW4NC6U%2520RSASZJVKIVDKNIJOFTLLNKFTOQ%2520ERZ54VLP4RD6HODOYK23XSX4BQ%25203HP6BLFYARCZBGRMABLAD2PEBI%2520WJGBQLJWAVCEPN2AIV2FYPLEQQ%2520ZDDUTBLVCNGVFEU6M6YIEBKOE4%2520DY7FKN2X7FHGFPN6JZSPHFDJMA%2520LFQ7GT4JGBC5PJ2S33DTVVRDV4%2520PJUZJPZEYVEZ3K3A25DHL2OIBA%2520G4GRBTLMXZH7FKWWTJY5AVC4GY%2520LNWFP3KVQVFNPNFFB3TJM3HNPY%2520PUG6RU3Y5FGJ7LCUTIIA55DEMM%2520RL2BLPQI3BBLRLMVZRISBG6DWQ%2520XFOPN6WW4NDUZNY7LOBYCRWPMY%2520XBOZOCZGHFBARKBL2TNPDHY4W4%2520UI5VU44OJBHOTN5J7XT7SGDJDY%2520ZC5DFCGM2FCL3O6I4C4IDPBHL4%2520NIMQPG5M4FGHLH4JESCYJ5WINQ%2520UE5UX47OCNE7BNYFT4LON5QX4M%2520AGBXPPCMABHXNKQUJDF6X6KKCU%2520AGOSHON765HEBLR2TLI45HCPBY%2520K4CSA4YKU5GFZD3TD6BKOTZIYU%2520ZPJ4GHAMR5ANBLM47FPYTVA23E%2520M3RXLRWEEJB4PFPGOI3TONFYVU%2520UMVCXX557RE35JQPY7S4ZJABNU%25207BNQ2NFZYFH5PGOU7STECHYSUY%2520274LR45D5VB3DBC5RYMLXNQ2R4%2520LGANWEGOF5ANDFRXMEX5ZASSSU%2520M7SGUCRJNBB5JDW4443X44U2GY%2520TMFFYOXFYBCJXHKYL6OOGGAV2I%2520CCQT57A5NFGCHDNTULDYMCSAU4%25205QEGSYOPCJHWZEYCWECEQEPJFM%25202AJ74YZQCVHL3PXLCYA43NGZBE%2520OTNRR64V2NDNBD3IR5BAHZLIHQ%2520LJ4SFQ4RKFDP3AW3MIPGDXMEAU%22%2C%22feedSize%22%3A45%7D&filter=%7B_id%2Ccontent_elements%7B_id%2Ccanonical_url%2Ccreated_date%2Cdisplay_date%2Cheadlines%7Bbasic%7D%2Clast_updated_date%2Cpromo_items%7Bbasic%7B_id%2Ccaption%2Ccreated_date%2Cheight%2Clast_updated_date%2Cpromo_image%7Burl%7D%2Ctype%2Curl%2Cversion%2Cwidth%7D%2Ccanonical_website%2Ccredits%2Cdisplay_date%2Cfirst_publish_date%2Clocation%2Cpublish_date%2Crelated_content%2Csubtype%7D%2Crevision%2Csource%7Badditional_properties%2Cname%2Csource_id%2Csource_type%2Csystem%7D%2Ctaxonomy%7Bprimary_section%7B_id%2Cpath%7D%7D%2Ctype%2Cversion%2Cwebsite%2Cwebsite_url%7D%2Ccount%2Ctype%2Cversion%7D&d=124&_website=hk-appledaily'
request = req.Request(url, headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36'
})
with req.urlopen(request) as response:
data = response.read().decode("utf-8")
data = json.loads(data)
#print(data)
posts = data["content_elements"]
#[0]["headlines"]["basic"]
for key in posts:
k=key["headlines"]["basic"]
t=key["last_updated_date".replace("T", "#")]
print(t+"-"+k)