20
2020
09

Transforming to JSON data format

import lxml

from bs4 import BeautifulSoup

import time

import random

import csv

import codecs

import unicodecsv as csv

import json




import urllib.request as req


url = 'https://hk.appledaily.com/pf/api/v3/content/fetch/query-feed?query=%7B%22feedQuery%22%3A%22_id%3DBOYXWJJ3QVG2ZMR6SYQFRH4A6Y%2520LSBS6UUXMRD2FJGYLBIGMGBIKA%252055LGLUZT6VB35LLUWYIUJYKCIY%2520VRRZ756IDJEMLBAZ2ORNA5KJAE%2520CDVHR4KICBBX3LWHIPQ6RFCWUA%2520CNYVVX2NPREJ5KLDY7HEIWONYM%2520RLSZZQYRONECPJX5C3FVNWHLPA%2520DCTTQKKXJZAOFEAM456Y3GQNBM%25206PKBF5GGWRCNVFDW43W6IMNXLM%25205MB7VMECKJF3XPAWVJ2CU2XOWI%25207HV62OCQ3JGGTHQ4YO5SW4NC6U%2520RSASZJVKIVDKNIJOFTLLNKFTOQ%2520ERZ54VLP4RD6HODOYK23XSX4BQ%25203HP6BLFYARCZBGRMABLAD2PEBI%2520WJGBQLJWAVCEPN2AIV2FYPLEQQ%2520ZDDUTBLVCNGVFEU6M6YIEBKOE4%2520DY7FKN2X7FHGFPN6JZSPHFDJMA%2520LFQ7GT4JGBC5PJ2S33DTVVRDV4%2520PJUZJPZEYVEZ3K3A25DHL2OIBA%2520G4GRBTLMXZH7FKWWTJY5AVC4GY%2520LNWFP3KVQVFNPNFFB3TJM3HNPY%2520PUG6RU3Y5FGJ7LCUTIIA55DEMM%2520RL2BLPQI3BBLRLMVZRISBG6DWQ%2520XFOPN6WW4NDUZNY7LOBYCRWPMY%2520XBOZOCZGHFBARKBL2TNPDHY4W4%2520UI5VU44OJBHOTN5J7XT7SGDJDY%2520ZC5DFCGM2FCL3O6I4C4IDPBHL4%2520NIMQPG5M4FGHLH4JESCYJ5WINQ%2520UE5UX47OCNE7BNYFT4LON5QX4M%2520AGBXPPCMABHXNKQUJDF6X6KKCU%2520AGOSHON765HEBLR2TLI45HCPBY%2520K4CSA4YKU5GFZD3TD6BKOTZIYU%2520ZPJ4GHAMR5ANBLM47FPYTVA23E%2520M3RXLRWEEJB4PFPGOI3TONFYVU%2520UMVCXX557RE35JQPY7S4ZJABNU%25207BNQ2NFZYFH5PGOU7STECHYSUY%2520274LR45D5VB3DBC5RYMLXNQ2R4%2520LGANWEGOF5ANDFRXMEX5ZASSSU%2520M7SGUCRJNBB5JDW4443X44U2GY%2520TMFFYOXFYBCJXHKYL6OOGGAV2I%2520CCQT57A5NFGCHDNTULDYMCSAU4%25205QEGSYOPCJHWZEYCWECEQEPJFM%25202AJ74YZQCVHL3PXLCYA43NGZBE%2520OTNRR64V2NDNBD3IR5BAHZLIHQ%2520LJ4SFQ4RKFDP3AW3MIPGDXMEAU%22%2C%22feedSize%22%3A45%7D&filter=%7B_id%2Ccontent_elements%7B_id%2Ccanonical_url%2Ccreated_date%2Cdisplay_date%2Cheadlines%7Bbasic%7D%2Clast_updated_date%2Cpromo_items%7Bbasic%7B_id%2Ccaption%2Ccreated_date%2Cheight%2Clast_updated_date%2Cpromo_image%7Burl%7D%2Ctype%2Curl%2Cversion%2Cwidth%7D%2Ccanonical_website%2Ccredits%2Cdisplay_date%2Cfirst_publish_date%2Clocation%2Cpublish_date%2Crelated_content%2Csubtype%7D%2Crevision%2Csource%7Badditional_properties%2Cname%2Csource_id%2Csource_type%2Csystem%7D%2Ctaxonomy%7Bprimary_section%7B_id%2Cpath%7D%7D%2Ctype%2Cversion%2Cwebsite%2Cwebsite_url%7D%2Ccount%2Ctype%2Cversion%7D&d=124&_website=hk-appledaily'


request = req.Request(url, headers={

    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36'

})

with req.urlopen(request) as response:

    data = response.read().decode("utf-8")


data = json.loads(data)


#print(data)


posts = data["content_elements"]

#[0]["headlines"]["basic"]


for key in posts:

    k=key["headlines"]["basic"]

    t=key["last_updated_date".replace("T", "#")]

    print(t+"-"+k)


« 上一篇 下一篇 »

发表评论:

◎欢迎参与讨论,请在这里发表您的看法、交流您的观点。