Azure Computer Vision APIでテキスト抽出(Read API)やーる(Python3.6)
2020年11月4日
|In Azure
はじめに
画像からテキスト抽出をやってみました
開発環境
- Windows 10
- Anaconda
- Python 3.6
- OpenCV 4.4.0
- Azure Computer Vision API
- Computer Vision クライアント ライブラリ(必要なら)
導入
1.Azureポータルにログインします
2.Computer Vision APIのリソースを作成します
4.必要なライブラリをインストールしてください。
1 2 3 4 |
pip install matplotlib pip install pillow pip install opencv-python pip install --upgrade azure-cognitiveservices-vision-computervision |
5.メモしたキーとエンドポイントを入力し、次のコードを実行してみましょう!
1 2 |
subscription_key = "<your subscription key>" endpoint = "<your API endpoint>" |
エンドポイントはリージョン(場所)指定でも動くようです。
1 |
endpoint = "https://<your region>.api.cognitive.microsoft.com/" |
画像URLからテキスト抽出する
クイック スタート:Computer Vision の REST API と Python を使用して印刷されたテキストと手書きテキストを抽出する
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
import json import os import os.path import sys import requests import time import matplotlib.pyplot as plt from matplotlib.patches import Polygon from PIL import Image from io import BytesIO # import cv2 subscription_key = "" endpoint = "" # endpoint = "https://japanwest.api.cognitive.microsoft.com/" text_recognition_url = endpoint + "vision/v3.1/read/analyze" image_url = "https://raw.githubusercontent.com/MicrosoftDocs/azure-docs/master/articles/cognitive-services/Computer-vision/Images/readsample.jpg" headers = {'Ocp-Apim-Subscription-Key': subscription_key} data = {'url': image_url} response = requests.post(text_recognition_url, headers=headers, json=data) response.raise_for_status() operation_url = response.headers["Operation-Location"] analysis = {} poll = True while (poll): response_final = requests.get(response.headers["Operation-Location"], headers=headers) analysis = response_final.json() print(json.dumps(analysis, indent=4)) time.sleep(1) if ("analyzeResult" in analysis): poll = False if ("status" in analysis and analysis['status'] == 'failed'): poll = False polygons = [] if ("analyzeResult" in analysis): polygons = [(line["boundingBox"], line["text"]) for line in analysis["analyzeResult"]["readResults"][0]["lines"]] image = Image.open(BytesIO(requests.get(image_url).content)) ax = plt.imshow(image) for polygon in polygons: vertices = [(polygon[0][i], polygon[0][i+1]) for i in range(0, len(polygon[0]), 2)] text = polygon[1] patch = Polygon(vertices, closed=True, fill=False, linewidth=2, color='y') ax.axes.add_patch(patch) plt.text(vertices[0][0], vertices[0][1], text, fontsize=20, va="top") plt.show() |
input | output |
---|---|
![]() |
![]() |
ローカル画像からテキスト抽出する
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
import json import os import os.path import sys import requests import time import matplotlib.pyplot as plt from matplotlib.patches import Polygon from PIL import Image from io import BytesIO import cv2 subscription_key = "" endpoint = "" # endpoint = "https://japanwest.api.cognitive.microsoft.com/" text_recognition_url = endpoint + "vision/v3.1/read/analyze" headers = {'Ocp-Apim-Subscription-Key': subscription_key, 'Content-Type': 'application/octet-stream'} filename = "readsample.jpg" root, ext = os.path.splitext(filename) # image_data = open(filename, "rb").read() color = cv2.imread(filename, cv2.IMREAD_COLOR) cv2.namedWindow("color", cv2.WINDOW_NORMAL) cv2.imshow("color", color) cv2.waitKey(1) image_data = cv2.imencode(ext, color)[1].tostring() response = requests.post(text_recognition_url, headers=headers, data=image_data) response.raise_for_status() operation_url = response.headers["Operation-Location"] analysis = {} poll = True while (poll): response_final = requests.get( response.headers["Operation-Location"], headers=headers) analysis = response_final.json() print(json.dumps(analysis, indent=4)) time.sleep(1) if ("analyzeResult" in analysis): poll = False if ("status" in analysis and analysis['status'] == 'failed'): poll = False polygons = [] if ("analyzeResult" in analysis): polygons = [(line["boundingBox"], line["text"]) for line in analysis["analyzeResult"]["readResults"][0]["lines"]] # image = Image.open(BytesIO(image_data)) image = Image.fromarray(color) ax = plt.imshow(image) for polygon in polygons: vertices = [(polygon[0][i], polygon[0][i+1]) for i in range(0, len(polygon[0]), 2)] text = polygon[1] patch = Polygon(vertices, closed=True, fill=False, linewidth=2, color='y') ax.axes.add_patch(patch) plt.text(vertices[0][0], vertices[0][1], text, fontsize=20, va="top") plt.show() |
input | output |
---|---|
![]() |
![]() |
Computer Vision クライアント ライブラリを使用する
クイック スタート:Computer Vision クライアント ライブラリを使用する
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 |
from azure.cognitiveservices.vision.computervision import ComputerVisionClient from azure.cognitiveservices.vision.computervision.models import OperationStatusCodes from azure.cognitiveservices.vision.computervision.models import VisualFeatureTypes from msrest.authentication import CognitiveServicesCredentials from array import array import os from PIL import Image import sys import time import cv2 from io import BytesIO subscription_key = "" endpoint = "" # endpoint = "https://japanwest.api.cognitive.microsoft.com/" computervision_client = ComputerVisionClient(endpoint, CognitiveServicesCredentials(subscription_key)) print("===== Batch Read File - remote =====") remote_image_handw_text_url = "https://raw.githubusercontent.com/MicrosoftDocs/azure-docs/master/articles/cognitive-services/Computer-vision/Images/readsample.jpg" recognize_handw_results = computervision_client.read(remote_image_handw_text_url, raw=True) operation_location_remote = recognize_handw_results.headers["Operation-Location"] operation_id = operation_location_remote.split("/")[-1] while True: get_handw_text_results = computervision_client.get_read_result(operation_id) if get_handw_text_results.status not in ['notStarted', 'running']: break time.sleep(1) if get_handw_text_results.status == OperationStatusCodes.succeeded: for text_result in get_handw_text_results.analyze_result.read_results: for line in text_result.lines: print(line.text) print(line.bounding_box) print() |
1 2 3 4 5 6 7 |
===== Batch Read File - remote ===== The quick brown fox jumps [38.0, 650.0, 2572.0, 699.0, 2570.0, 854.0, 37.0, 815.0] over [184.0, 1053.0, 508.0, 1044.0, 510.0, 1123.0, 184.0, 1128.0] the lazy dog! [639.0, 1011.0, 1976.0, 1026.0, 1974.0, 1158.0, 637.0, 1141.0] |
お疲れ様でした。
あわせて読みたい