References:
其實還有更快方法,google drive開啟圖片檔案時,點選右鍵,選擇開啟工具,以Google文件的類型開啟,開啟後的檔案會自動將圖片中的文字轉為文字檔,所以白做了>.<?。
import os
import shutil
!pip install pytesseract
!sudo apt update
!sudo apt install tesseract-ocr
!sudo apt install libtesseract-dev
!pip install opencc-python-reimplemented;
!pip install Pillow==5.3.0
os.chdir("/usr/share/tesseract-ocr/4.00/tessdata/")
!wget https://github.com/tesseract-ocr/tessdata/raw/4.00/chi_tra.traineddata
!wget https://github.com/tesseract-ocr/tessdata/raw/4.00/chi_sim.traineddata
!wget https://github.com/tesseract-ocr/tessdata/raw/4.00/jpn.traineddata
#need to reset run time after install
from google.colab import drive
drive.mount('/content/drive/', force_remount=False)
from PIL import Image
import pytesseract
from opencc import OpenCC
from IPython.display import clear_output
clear_output()
cc = OpenCC('s2t')
img = Image.open('/content/drive/MyDrive/1.png')
text = pytesseract.image_to_string(img, lang='chi_tra') #'eng', 'chi_sim', 'chi_tra', 'jpn'
print(cc.convert(text))