22
2021
09

BV list pdf to excel

!pip install tabula-py



import tabula

import pandas as pd

from google.colab import drive

from google.colab import files

from google.colab import auth

auth.authenticate_user()

from oauth2client.client import GoogleCredentials

import re

from google.colab import files

drive.mount('/content/drive', force_remount=True)

global path

path = "/content/drive"

uploaded = files.upload()

filename = next(iter(uploaded))


df = tabula.read_pdf("/content/%s"%filename, pages = 'all')


result = pd.concat(df)   #result = pandas dataframe


#read_excel=pd.read_excel("/content/drive/MyDrive/uploaded/%s.xlsx"%filename.split(".")[0], usecols=[5], names=None)

#read_excel.values.tolist()   **change column F of table to list



desc=result["Component"].values.tolist()


data = [str(i).replace("nan","").replace("CPSIA lead in surface coating\rCCPSA heavy metal in surface\rcoating","") for i in desc]



cc=[f if re.match('.*\s+coating', f) else "" for f in data]


print(cc)


dd=[d if re.match('.*\s+[P|p]lastic', d) or re.match('.*\s+PVC', d) else "" for d in data]


print(dd)


e=[cc[i]+dd[i] for i in range(len(cc))]


print(e)



def get_indexes(list, element):

    index_pos_list = [i for i in range(len(list)) if list[i] == element or element in list[i]]

    return index_pos_list


def replace(index_list,list_tbm,string):

    for index in index_list:

        list_tbm[index] = string

    return list_tbm



index_list = get_indexes(e, "plastic")

replace(index_list,e,"plastic")

index_list = get_indexes(e, "Plastic")

replace(index_list,e,"plastic")

index_list = get_indexes(e, "PVC")

replace(index_list,e,"PVC plastic")

index_list = get_indexes(e, "coating")

replace(index_list,e,"coating")


result["T"] = [aa.replace(' PVC','').replace(' plastic','').replace(' coating','').replace(' Plastic','') for aa in data if "PVC" or "plastic" or "coating" or "Plastic"]

result["S"] = e

result.to_excel('/content/drive/MyDrive/uploaded/%s_use.xlsx'%filename.split(".")[0])

« 上一篇 下一篇 »

发表评论:

◎欢迎参与讨论,请在这里发表您的看法、交流您的观点。