Pular para conteúdo

Tutorial: Processamento Automático de Faturas com OCR

Automatize a extração de dados de faturas usando AWS Textract, validação inteligente e integração com sistemas financeiros.

O Que Você Vai Construir

Sistema que: 1. Recebe faturas por email ou upload 2. Extrai dados com AWS Textract OCR 3. Valida e estrutura informações 4. Salva no ERP/banco de dados 5. Cria registros contábeis 6. Notifica equipe financeira

Tempo estimado: 40 minutos Nível: Intermediário Impacto: 90% menos tempo manual, zero erros de digitação

Pré-requisitos

  • ✅ AWS Account (Textract)
  • ✅ Email configurado ou S3
  • ✅ PostgreSQL ou ERP

Parte 1: Estrutura

CREATE TABLE invoices (
  id SERIAL PRIMARY KEY,
  invoice_number VARCHAR(100),
  supplier_name VARCHAR(255),
  supplier_tax_id VARCHAR(50),
  invoice_date DATE,
  due_date DATE,
  total_amount DECIMAL(12,2),
  tax_amount DECIMAL(12,2),
  currency VARCHAR(3) DEFAULT 'BRL',

  -- OCR
  file_url VARCHAR(500),
  ocr_confidence DECIMAL(5,2),
  raw_ocr_data JSONB,

  -- Validação
  validation_status VARCHAR(50) DEFAULT 'pending',
  validation_errors JSONB,

  -- Processamento
  status VARCHAR(50) DEFAULT 'received',
  processed_at TIMESTAMP,
  created_at TIMESTAMP DEFAULT NOW()
);

CREATE TABLE invoice_items (
  id SERIAL PRIMARY KEY,
  invoice_id INT REFERENCES invoices(id),
  description TEXT,
  quantity DECIMAL(10,2),
  unit_price DECIMAL(12,2),
  total_price DECIMAL(12,2)
);

Parte 2: Receber Fatura

// Webhook: Email com anexo ou Upload

// Transform: "Extrair Arquivo"
{{
  {
    "fileName": $trigger.body.attachments?.[0]?.filename || $trigger.body.fileName,
    "fileUrl": $trigger.body.attachments?.[0]?.url || $trigger.body.fileUrl,
    "fileType": $trigger.body.attachments?.[0]?.contentType || 'application/pdf',
    "supplier": $trigger.body.from || $trigger.body.supplier
  }
}}

Parte 3: OCR com AWS Textract

// HTTP Request: "Iniciar Análise Textract"
{
  "method": "POST",
  "url": "https://textract.us-east-1.amazonaws.com/",
  "headers": {
    "X-Amz-Target": "Textract.AnalyzeExpense",
    "Content-Type": "application/x-amz-json-1.1"
  },
  "authentication": "aws",
  "body": {
    "Document": {
      "S3Object": {
        "Bucket": "{{ $vars.s3Bucket }}",
        "Name": "{{ $nodes['extrair'].output.fileName }}"
      }
    }
  }
}
// Function: "Processar Resultado Textract"
{{
  (() => {
    const result = $nodes['textract'].output;
    const summaryFields = result.ExpenseDocuments?.[0]?.SummaryFields || [];

    const extractField = (type) => {
      const field = summaryFields.find(f => f.Type?.Text === type);
      return {
        value: field?.ValueDetection?.Text || null,
        confidence: field?.ValueDetection?.Confidence || 0
      };
    };

    return {
      invoiceNumber: extractField('INVOICE_RECEIPT_ID'),
      supplierName: extractField('VENDOR_NAME'),
      taxId: extractField('TAX_PAYER_ID'),
      invoiceDate: extractField('INVOICE_RECEIPT_DATE'),
      dueDate: extractField('DUE_DATE'),
      totalAmount: extractField('TOTAL'),
      taxAmount: extractField('TAX'),
      lineItems: (result.ExpenseDocuments?.[0]?.LineItemGroups?.[0]?.LineItems || []).map(item => ({
        description: item.LineItemExpenseFields?.find(f => f.Type?.Text === 'ITEM')?.ValueDetection?.Text,
        quantity: parseFloat(item.LineItemExpenseFields?.find(f => f.Type?.Text === 'QUANTITY')?.ValueDetection?.Text || 0),
        unitPrice: parseFloat(item.LineItemExpenseFields?.find(f => f.Type?.Text === 'UNIT_PRICE')?.ValueDetection?.Text || 0),
        totalPrice: parseFloat(item.LineItemExpenseFields?.find(f => f.Type?.Text === 'PRICE')?.ValueDetection?.Text || 0)
      })),
      rawData: result
    };
  })()
}}

Parte 4: Validação

// Function: "Validar Dados"
{{
  (() => {
    const data = $nodes['processar'].output;
    const errors = [];

    // Validações
    if (!data.invoiceNumber.value) {
      errors.push({ field: 'invoiceNumber', message: 'Número da nota não encontrado' });
    }

    if (data.invoiceNumber.confidence < 0.9) {
      errors.push({ field: 'invoiceNumber', message: 'Baixa confiança no número', confidence: data.invoiceNumber.confidence });
    }

    if (!data.supplierName.value) {
      errors.push({ field: 'supplierName', message: 'Nome do fornecedor não encontrado' });
    }

    const total = parseFloat(data.totalAmount.value?.replace(/[^\d.,]/g, '').replace(',', '.') || 0);
    if (total === 0) {
      errors.push({ field: 'totalAmount', message: 'Valor total inválido' });
    }

    // Validar soma dos itens
    const itemsTotal = data.lineItems.reduce((sum, item) => sum + item.totalPrice, 0);
    if (Math.abs(itemsTotal - total) > 0.01 && itemsTotal > 0) {
      errors.push({
        field: 'lineItems',
        message: 'Soma dos itens não bate com total',
        expected: total,
        found: itemsTotal
      });
    }

    const avgConfidence = [
      data.invoiceNumber.confidence,
      data.supplierName.confidence,
      data.totalAmount.confidence
    ].reduce((a, b) => a + b, 0) / 3;

    return {
      isValid: errors.length === 0,
      errors,
      avgConfidence,
      needsReview: errors.length > 0 || avgConfidence < 0.85
    };
  })()
}}

Parte 5: Salvar Dados

// Database Insert: "Salvar Fatura"
{
  "table": "invoices",
  "data": {
    "invoice_number": "{{ $nodes['processar'].output.invoiceNumber.value }}",
    "supplier_name": "{{ $nodes['processar'].output.supplierName.value }}",
    "supplier_tax_id": "{{ $nodes['processar'].output.taxId.value }}",
    "invoice_date": "{{ $nodes['processar'].output.invoiceDate.value }}",
    "due_date": "{{ $nodes['processar'].output.dueDate.value }}",
    "total_amount": "{{ parseFloat($nodes['processar'].output.totalAmount.value?.replace(/[^\\d.,]/g, '').replace(',', '.') || 0) }}",
    "tax_amount": "{{ parseFloat($nodes['processar'].output.taxAmount.value?.replace(/[^\\d.,]/g, '').replace(',', '.') || 0) }}",
    "file_url": "{{ $nodes['extrair'].output.fileUrl }}",
    "ocr_confidence": "{{ $nodes['validar'].output.avgConfidence }}",
    "raw_ocr_data": "{{ JSON.stringify($nodes['processar'].output.rawData) }}",
    "validation_status": "{{ $nodes['validar'].output.isValid ? 'valid' : 'needs_review' }}",
    "validation_errors": "{{ JSON.stringify($nodes['validar'].output.errors) }}",
    "status": "{{ $nodes['validar'].output.needsReview ? 'pending_review' : 'approved' }}"
  },
  "returning": ["id"]
}

// Database Insert (Batch): "Salvar Itens"
{
  "table": "invoice_items",
  "data": "{{ $nodes['processar'].output.lineItems.map(item => ({ invoice_id: $nodes['salvar-fatura'].output.id, description: item.description, quantity: item.quantity, unit_price: item.unitPrice, total_price: item.totalPrice })) }}"
}

Parte 6: Notificações

// If/Else: "Precisa Revisão?"
{{ $nodes['validar'].output.needsReview }}

// Branch TRUE: Notificar equipe

// Slack Message
{
  "channel": "#financeiro",
  "text": "⚠️ Fatura precisa de revisão manual",
  "blocks": [
    {
      "type": "section",
      "text": {
        "type": "mrkdwn",
        "text": `*Fatura #{{ $nodes['processar'].output.invoiceNumber.value }}* precisa de revisão\n\n*Fornecedor:* {{ $nodes['processar'].output.supplierName.value }}\n*Valor:* R$ {{ $nodes['processar'].output.totalAmount.value }}\n*Confiança:* {{ ($nodes['validar'].output.avgConfidence * 100).toFixed(0) }}%\n\n*Problemas encontrados:*\n{{ $nodes['validar'].output.errors.map(e => ` ${e.message}`).join('\n') }}`
      },
      "accessory": {
        "type": "button",
        "text": {
          "type": "plain_text",
          "text": "Revisar Fatura"
        },
        "url": "{{ $vars.appUrl }}/invoices/{{ $nodes['salvar-fatura'].output.id }}"
      }
    }
  ]
}

// Branch FALSE: Auto-aprovar

// Send Email
{
  "to": "financeiro@empresa.com",
  "subject": "✅ Nova fatura processada automaticamente",
  "html": "<h2>Fatura #{{ $nodes['processar'].output.invoiceNumber.value }}</h2><p>Processada com sucesso e aprovada automaticamente.</p>"
}

Parte 7: Integração com ERP

// HTTP Request: "Criar no ERP"
{
  "method": "POST",
  "url": "{{ $vars.erpApiUrl }}/invoices",
  "headers": {
    "Authorization": "Bearer {{ $credentials.erp.apiKey }}",
    "Content-Type": "application/json"
  },
  "body": {
    "invoice_number": "{{ $nodes['processar'].output.invoiceNumber.value }}",
    "supplier": {
      "name": "{{ $nodes['processar'].output.supplierName.value }}",
      "tax_id": "{{ $nodes['processar'].output.taxId.value }}"
    },
    "date": "{{ $nodes['processar'].output.invoiceDate.value }}",
    "due_date": "{{ $nodes['processar'].output.dueDate.value }}",
    "amount": "{{ $nodes['processar'].output.totalAmount.value }}",
    "items": "{{ $nodes['processar'].output.lineItems }}"
  }
}

Métricas

-- Taxa de aprovação automática
SELECT
  COUNT(*) as total,
  COUNT(*) FILTER (WHERE validation_status = 'valid') as auto_approved,
  COUNT(*) FILTER (WHERE validation_status = 'needs_review') as needs_review,
  ROUND(
    COUNT(*) FILTER (WHERE validation_status = 'valid')::DECIMAL /
    NULLIF(COUNT(*), 0) * 100,
    2
  ) as auto_approval_rate,
  AVG(ocr_confidence) as avg_confidence
FROM invoices
WHERE created_at >= NOW() - INTERVAL '30 days';

-- Economia de tempo
SELECT
  COUNT(*) as invoices_processed,
  COUNT(*) * 15 as minutes_saved, -- 15 min por fatura manual
  ROUND(COUNT(*) * 15.0 / 60, 1) as hours_saved
FROM invoices
WHERE created_at >= NOW() - INTERVAL '30 days'
  AND status = 'approved';

Próximos Passos

  1. Team Onboarding - Automatize processos
  2. Analytics Dashboard - Visualize métricas
  3. Slack Bot - Bot para aprovar faturas

Última atualização: Janeiro 2025