Tutorial: Processamento Automático de Faturas com OCR
Automatize a extração de dados de faturas usando AWS Textract, validação inteligente e integração com sistemas financeiros.
O Que Você Vai Construir
Sistema que: 1. Recebe faturas por email ou upload 2. Extrai dados com AWS Textract OCR 3. Valida e estrutura informações 4. Salva no ERP/banco de dados 5. Cria registros contábeis 6. Notifica equipe financeira
Tempo estimado: 40 minutos Nível: Intermediário Impacto: 90% menos tempo manual, zero erros de digitação
Pré-requisitos
- ✅ AWS Account (Textract)
- ✅ Email configurado ou S3
- ✅ PostgreSQL ou ERP
Parte 1: Estrutura
CREATE TABLE invoices (
id SERIAL PRIMARY KEY,
invoice_number VARCHAR(100),
supplier_name VARCHAR(255),
supplier_tax_id VARCHAR(50),
invoice_date DATE,
due_date DATE,
total_amount DECIMAL(12,2),
tax_amount DECIMAL(12,2),
currency VARCHAR(3) DEFAULT 'BRL',
-- OCR
file_url VARCHAR(500),
ocr_confidence DECIMAL(5,2),
raw_ocr_data JSONB,
-- Validação
validation_status VARCHAR(50) DEFAULT 'pending',
validation_errors JSONB,
-- Processamento
status VARCHAR(50) DEFAULT 'received',
processed_at TIMESTAMP,
created_at TIMESTAMP DEFAULT NOW()
);
CREATE TABLE invoice_items (
id SERIAL PRIMARY KEY,
invoice_id INT REFERENCES invoices(id),
description TEXT,
quantity DECIMAL(10,2),
unit_price DECIMAL(12,2),
total_price DECIMAL(12,2)
);
Parte 2: Receber Fatura
// Webhook: Email com anexo ou Upload
// Transform: "Extrair Arquivo"
{{
{
"fileName": $trigger.body.attachments?.[0]?.filename || $trigger.body.fileName,
"fileUrl": $trigger.body.attachments?.[0]?.url || $trigger.body.fileUrl,
"fileType": $trigger.body.attachments?.[0]?.contentType || 'application/pdf',
"supplier": $trigger.body.from || $trigger.body.supplier
}
}}
Parte 3: OCR com AWS Textract
// HTTP Request: "Iniciar Análise Textract"
{
"method": "POST",
"url": "https://textract.us-east-1.amazonaws.com/",
"headers": {
"X-Amz-Target": "Textract.AnalyzeExpense",
"Content-Type": "application/x-amz-json-1.1"
},
"authentication": "aws",
"body": {
"Document": {
"S3Object": {
"Bucket": "{{ $vars.s3Bucket }}",
"Name": "{{ $nodes['extrair'].output.fileName }}"
}
}
}
}
// Function: "Processar Resultado Textract"
{{
(() => {
const result = $nodes['textract'].output;
const summaryFields = result.ExpenseDocuments?.[0]?.SummaryFields || [];
const extractField = (type) => {
const field = summaryFields.find(f => f.Type?.Text === type);
return {
value: field?.ValueDetection?.Text || null,
confidence: field?.ValueDetection?.Confidence || 0
};
};
return {
invoiceNumber: extractField('INVOICE_RECEIPT_ID'),
supplierName: extractField('VENDOR_NAME'),
taxId: extractField('TAX_PAYER_ID'),
invoiceDate: extractField('INVOICE_RECEIPT_DATE'),
dueDate: extractField('DUE_DATE'),
totalAmount: extractField('TOTAL'),
taxAmount: extractField('TAX'),
lineItems: (result.ExpenseDocuments?.[0]?.LineItemGroups?.[0]?.LineItems || []).map(item => ({
description: item.LineItemExpenseFields?.find(f => f.Type?.Text === 'ITEM')?.ValueDetection?.Text,
quantity: parseFloat(item.LineItemExpenseFields?.find(f => f.Type?.Text === 'QUANTITY')?.ValueDetection?.Text || 0),
unitPrice: parseFloat(item.LineItemExpenseFields?.find(f => f.Type?.Text === 'UNIT_PRICE')?.ValueDetection?.Text || 0),
totalPrice: parseFloat(item.LineItemExpenseFields?.find(f => f.Type?.Text === 'PRICE')?.ValueDetection?.Text || 0)
})),
rawData: result
};
})()
}}
Parte 4: Validação
// Function: "Validar Dados"
{{
(() => {
const data = $nodes['processar'].output;
const errors = [];
// Validações
if (!data.invoiceNumber.value) {
errors.push({ field: 'invoiceNumber', message: 'Número da nota não encontrado' });
}
if (data.invoiceNumber.confidence < 0.9) {
errors.push({ field: 'invoiceNumber', message: 'Baixa confiança no número', confidence: data.invoiceNumber.confidence });
}
if (!data.supplierName.value) {
errors.push({ field: 'supplierName', message: 'Nome do fornecedor não encontrado' });
}
const total = parseFloat(data.totalAmount.value?.replace(/[^\d.,]/g, '').replace(',', '.') || 0);
if (total === 0) {
errors.push({ field: 'totalAmount', message: 'Valor total inválido' });
}
// Validar soma dos itens
const itemsTotal = data.lineItems.reduce((sum, item) => sum + item.totalPrice, 0);
if (Math.abs(itemsTotal - total) > 0.01 && itemsTotal > 0) {
errors.push({
field: 'lineItems',
message: 'Soma dos itens não bate com total',
expected: total,
found: itemsTotal
});
}
const avgConfidence = [
data.invoiceNumber.confidence,
data.supplierName.confidence,
data.totalAmount.confidence
].reduce((a, b) => a + b, 0) / 3;
return {
isValid: errors.length === 0,
errors,
avgConfidence,
needsReview: errors.length > 0 || avgConfidence < 0.85
};
})()
}}
Parte 5: Salvar Dados
// Database Insert: "Salvar Fatura"
{
"table": "invoices",
"data": {
"invoice_number": "{{ $nodes['processar'].output.invoiceNumber.value }}",
"supplier_name": "{{ $nodes['processar'].output.supplierName.value }}",
"supplier_tax_id": "{{ $nodes['processar'].output.taxId.value }}",
"invoice_date": "{{ $nodes['processar'].output.invoiceDate.value }}",
"due_date": "{{ $nodes['processar'].output.dueDate.value }}",
"total_amount": "{{ parseFloat($nodes['processar'].output.totalAmount.value?.replace(/[^\\d.,]/g, '').replace(',', '.') || 0) }}",
"tax_amount": "{{ parseFloat($nodes['processar'].output.taxAmount.value?.replace(/[^\\d.,]/g, '').replace(',', '.') || 0) }}",
"file_url": "{{ $nodes['extrair'].output.fileUrl }}",
"ocr_confidence": "{{ $nodes['validar'].output.avgConfidence }}",
"raw_ocr_data": "{{ JSON.stringify($nodes['processar'].output.rawData) }}",
"validation_status": "{{ $nodes['validar'].output.isValid ? 'valid' : 'needs_review' }}",
"validation_errors": "{{ JSON.stringify($nodes['validar'].output.errors) }}",
"status": "{{ $nodes['validar'].output.needsReview ? 'pending_review' : 'approved' }}"
},
"returning": ["id"]
}
// Database Insert (Batch): "Salvar Itens"
{
"table": "invoice_items",
"data": "{{ $nodes['processar'].output.lineItems.map(item => ({ invoice_id: $nodes['salvar-fatura'].output.id, description: item.description, quantity: item.quantity, unit_price: item.unitPrice, total_price: item.totalPrice })) }}"
}
Parte 6: Notificações
// If/Else: "Precisa Revisão?"
{{ $nodes['validar'].output.needsReview }}
// Branch TRUE: Notificar equipe
// Slack Message
{
"channel": "#financeiro",
"text": "⚠️ Fatura precisa de revisão manual",
"blocks": [
{
"type": "section",
"text": {
"type": "mrkdwn",
"text": `*Fatura #{{ $nodes['processar'].output.invoiceNumber.value }}* precisa de revisão\n\n*Fornecedor:* {{ $nodes['processar'].output.supplierName.value }}\n*Valor:* R$ {{ $nodes['processar'].output.totalAmount.value }}\n*Confiança:* {{ ($nodes['validar'].output.avgConfidence * 100).toFixed(0) }}%\n\n*Problemas encontrados:*\n{{ $nodes['validar'].output.errors.map(e => `• ${e.message}`).join('\n') }}`
},
"accessory": {
"type": "button",
"text": {
"type": "plain_text",
"text": "Revisar Fatura"
},
"url": "{{ $vars.appUrl }}/invoices/{{ $nodes['salvar-fatura'].output.id }}"
}
}
]
}
// Branch FALSE: Auto-aprovar
// Send Email
{
"to": "financeiro@empresa.com",
"subject": "✅ Nova fatura processada automaticamente",
"html": "<h2>Fatura #{{ $nodes['processar'].output.invoiceNumber.value }}</h2><p>Processada com sucesso e aprovada automaticamente.</p>"
}
Parte 7: Integração com ERP
// HTTP Request: "Criar no ERP"
{
"method": "POST",
"url": "{{ $vars.erpApiUrl }}/invoices",
"headers": {
"Authorization": "Bearer {{ $credentials.erp.apiKey }}",
"Content-Type": "application/json"
},
"body": {
"invoice_number": "{{ $nodes['processar'].output.invoiceNumber.value }}",
"supplier": {
"name": "{{ $nodes['processar'].output.supplierName.value }}",
"tax_id": "{{ $nodes['processar'].output.taxId.value }}"
},
"date": "{{ $nodes['processar'].output.invoiceDate.value }}",
"due_date": "{{ $nodes['processar'].output.dueDate.value }}",
"amount": "{{ $nodes['processar'].output.totalAmount.value }}",
"items": "{{ $nodes['processar'].output.lineItems }}"
}
}
Métricas
-- Taxa de aprovação automática
SELECT
COUNT(*) as total,
COUNT(*) FILTER (WHERE validation_status = 'valid') as auto_approved,
COUNT(*) FILTER (WHERE validation_status = 'needs_review') as needs_review,
ROUND(
COUNT(*) FILTER (WHERE validation_status = 'valid')::DECIMAL /
NULLIF(COUNT(*), 0) * 100,
2
) as auto_approval_rate,
AVG(ocr_confidence) as avg_confidence
FROM invoices
WHERE created_at >= NOW() - INTERVAL '30 days';
-- Economia de tempo
SELECT
COUNT(*) as invoices_processed,
COUNT(*) * 15 as minutes_saved, -- 15 min por fatura manual
ROUND(COUNT(*) * 15.0 / 60, 1) as hours_saved
FROM invoices
WHERE created_at >= NOW() - INTERVAL '30 days'
AND status = 'approved';
Próximos Passos
- Team Onboarding - Automatize processos
- Analytics Dashboard - Visualize métricas
- Slack Bot - Bot para aprovar faturas
Última atualização: Janeiro 2025