Manually entering data from invoices can be a time-consuming task. The OCR API automates this process by swiftly extracting relevant information such as the invoice number, date, vendor details, and line items. This not only saves significant time but also allows you to concentrate on more critical tasks.
Moreover, automating invoice processing with the OCR API can reduce the necessity for manual data entry, subsequently lowering labor costs. It also mitigates the risk of human errors that could lead to financial losses.
A Python script for scanning invoices will utilize the OCR API. In this tutorial, it will specifically focus on scanning the list of items to be paid within an invoice.
Do not forget to install required packages: pip install requests
def parse_args():
"""Parse command line arguments."""
parser = argparse.ArgumentParser()
parser.add_argument('--api-key', help='Rapid API key.', required=True) # Get your token at https://rapidapi.com/api4ai-api4ai-default/api/ocr43/pricing
parser.add_argument('pdf', type=Path,
help='Path to a pdf.')
return parser.parse_args()
Send a request to OCR API using requests
. The response should be returned to use later.
def parse_pdf(pdf: bytes, api_key: str) -> dict:
"""
Send request to OCR API.
"""
# We strongly recommend you use exponential backoff.
error_statuses = (408, 409, 429, 500, 502, 503, 504)
s = requests.Session()
retries = Retry(backoff_factor=1.5, status_forcelist=error_statuses)
s.mount('https://', HTTPAdapter(max_retries=retries))
url = f'{API_URL}/v1/results'
api_res = s.post(url, files={'image': pdf},
headers={'X-RapidAPI-Key': api_key}, timeout=20)
api_res_json = api_res.json()
# Handle processing failure.
if (api_res.status_code != 200 or
api_res_json['results'][0]['status']['code'] == 'failure'):
print('Image processing failed.')
sys.exit(1)
return api_res_json
Invoice class will represent parsed invoice. The class accepts a response from OCR API.
This class will be specialized for processing a certain type of invoices, the link to which will be below.
class Invoice:
"""
Class representing invoice.
"""
def __init__(self, ocr_api_response: dict):
"""
Accept response from OCR API.
"""
self.response = ocr_api_response
self.objects = self.response['results'][0]['entities'][0]['objects']
self.items = None
self.total = None
self._parse_invoice()
def _parse_invoice(self):
"""Parse parts of invoice."""
self.items = self._parse_items()
self.total = self._parse_total()
def _parse_items(self):
"""Parse of list items."""
items_header_row = self._find_objects_by_text(self.objects, 'item')[0]
subtotal_obj = self._find_objects_by_text(self.objects, 'subtotal')[0]
items_objs = [obj for obj in self.objects
if items_header_row['box'][1] < obj['box'][1] < subtotal_obj['box'][1]]
y_coords = set(map(lambda x: round(x['box'][1], 4), items_objs))
# _close_equal is used in case invoice is slightly rotated.
items_words_tmp = [[x['entities'][0]['text'] for x in sorted([obj for obj in items_objs if self._close_equal(obj['box'][1], y, 0.01)], key=lambda x: x['box'][0])] for y in y_coords]
items_words = []
[items_words.append(x) for x in items_words_tmp if x not in items_words]
items = []
for i, row in enumerate(items_words):
if len(row) < 4:
continue
try:
first_dollar = row.index('$')
except ValueError:
continue
new_row = [' '.join(row[:first_dollar - 1]),
row[first_dollar - 1],
row[first_dollar + 1],
row[first_dollar + 3]]
items.append(new_row)
return items
def _parse_total(self):
"""Parse total invoice price."""
# find the most bottom total object.
total_obj = max(self._find_objects_by_text(self.objects, 'total'),
key=lambda x: x['box'][1])
try:
total = [obj['entities'][0]['text'] for obj in self.objects
if self._close_equal(obj['box'][1], total_obj['box'][1], 0.01)
and obj['entities'][0]['text'].isdigit()][0]
return total
except IndexError:
return None
@staticmethod
def _find_objects_by_text(objects, text):
"""Find all objects containing specified text."""
return [obj for obj in objects
if obj['entities'][0]['text'].lower() == text]
@staticmethod
def _close_equal(num1, num2, epsilon):
"""Compare two numbers with inaccuracy."""
return abs(num1 - num2) < epsilon
"""
Parse invoice in PDF using OCR API.
Run script:
`python3 main.py --api-key <RAPID_API_KEY> <PATH_TO_PDF>
"""
import argparse
import sys
from pathlib import Path
import requests
from requests.adapters import Retry, HTTPAdapter
API_URL = 'https://ocr43.p.rapidapi.com/v1/results'
def parse_args():
"""Parse command line arguments."""
parser = argparse.ArgumentParser()
parser.add_argument('--api-key', help='Rapid API key.', required=True) # Get your token at https://rapidapi.com/api4ai-api4ai-default/api/ocr43/pricing
parser.add_argument('pdf', type=Path,
help='Path to a pdf.')
return parser.parse_args()
def parse_pdf(pdf: bytes, api_key: str) -> dict:
"""
Send request to OCR API.
"""
# We strongly recommend you use exponential backoff.
error_statuses = (408, 409, 429, 500, 502, 503, 504)
s = requests.Session()
retries = Retry(backoff_factor=1.5, status_forcelist=error_statuses)
s.mount('https://', HTTPAdapter(max_retries=retries))
url = f'{API_URL}/v1/results'
api_res = s.post(url, files={'image': pdf},
headers={'X-RapidAPI-Key': api_key}, timeout=20)
api_res_json = api_res.json()
# Handle processing failure.
if (api_res.status_code != 200 or
api_res_json['results'][0]['status']['code'] == 'failure'):
print('Image processing failed.')
sys.exit(1)
return api_res_json
class Invoice:
"""
Class representing invoice.
"""
def __init__(self, ocr_api_response: dict):
"""
Accept response from OCR API.
"""
self.response = ocr_api_response
self.objects = self.response['results'][0]['entities'][0]['objects']
self.items = None
self.total = None
self._parse_invoice()
def _parse_invoice(self):
"""Parse parts of invoice."""
self.items = self._parse_items()
self.total = self._parse_total()
def _parse_items(self):
"""Parse of list items."""
items_header_row = self._find_objects_by_text(self.objects, 'item')[0]
subtotal_obj = self._find_objects_by_text(self.objects, 'subtotal')[0]
items_objs = [obj for obj in self.objects
if items_header_row['box'][1] < obj['box'][1] < subtotal_obj['box'][1]]
y_coords = set(map(lambda x: round(x['box'][1], 4), items_objs))
items_words_tmp = [[x['entities'][0]['text'] for x in sorted([obj for obj in items_objs if self._close_equal(obj['box'][1], y, 0.01)], key=lambda x: x['box'][0])] for y in y_coords]
items_words = []
[items_words.append(x) for x in items_words_tmp if x not in items_words]
items = []
for i, row in enumerate(items_words):
if len(row) < 4:
continue
try:
first_dollar = row.index('$')
except ValueError:
continue
new_row = [' '.join(row[:first_dollar - 1]),
row[first_dollar - 1],
row[first_dollar + 1],
row[first_dollar + 3]]
items.append(new_row)
return items
def _parse_total(self):
"""Parse total invoice price."""
# find the most bottom total object.
total_obj = max(self._find_objects_by_text(self.objects, 'total'),
key=lambda x: x['box'][1])
try:
total = [obj['entities'][0]['text'] for obj in self.objects
if self._close_equal(obj['box'][1], total_obj['box'][1], 0.01)
and obj['entities'][0]['text'].isdigit()][0]
return total
except IndexError:
return None
@staticmethod
def _find_objects_by_text(objects, text):
"""Find all objects containing specified text."""
return [obj for obj in objects
if obj['entities'][0]['text'].lower() == text]
@staticmethod
def _close_equal(num1, num2, epsilon):
"""Compare two numbers with inaccuracy."""
return abs(num1 - num2) < epsilon
def main():
"""
Script entry function.
"""
args = parse_args()
with args.pdf.open('rb') as f:
response = parse_pdf(f, args.api_key)
invoice = Invoice(response)
print('Items: ', invoice.items)
print('Total: ', invoice.total)
if __name__ == '__main__':
main()
Download invoice.
Run the script: python3 main.py --api-key YOUR_API_KEY PATH/TO/INVOICE
.
You’ll see such the result:
The OCR API provides a streamlined solution for implementing invoice parsing tasks. In essence, leveraging the OCR API for invoice scanning offers a multitude of advantages.
You have the option to refine the existing code independently or reach out to us at https://api4.ai/get-started for a customized solution tailored to your specific requirements.