92 lines
3.5 KiB
Ruby
92 lines
3.5 KiB
Ruby
class Tesseract < Formula
|
|
desc "OCR (Optical Character Recognition) engine"
|
|
homepage "https://github.com/tesseract-ocr/"
|
|
url "https://github.com/tesseract-ocr/tesseract/archive/5.1.0.tar.gz"
|
|
sha256 "fdec8528d5a0ecc28ab5fff985e0b8ced60726f6ef33f54126f2868e323d4bd2"
|
|
license "Apache-2.0"
|
|
head "https://github.com/tesseract-ocr/tesseract.git", branch: "main"
|
|
|
|
livecheck do
|
|
url :stable
|
|
regex(/^v?(\d+(?:\.\d+)+)$/i)
|
|
end
|
|
|
|
bottle do
|
|
sha256 cellar: :any, arm64_monterey: "9ccac58d048015f4a413b77e101efb3dd887705013c8171cff4f1de291dd56d3"
|
|
sha256 cellar: :any, arm64_big_sur: "84bd6cc90719599508c9519f51dbf991f6ce55fbdfa4ad3c1dce6af1f62759e2"
|
|
sha256 cellar: :any, monterey: "8e95648b19daa92b54b9da27c7fdff4b1073395abb0e723117b1ff7766b16f02"
|
|
sha256 cellar: :any, big_sur: "68816323f1e265054a84cdb323026756e9402840cb2ccdcbe91ca19089cc77c5"
|
|
sha256 cellar: :any, catalina: "4410c76a07b5dfb8b5af20ee8b2622d29a0ffeed1d41141ef7722d1b2365f6c8"
|
|
sha256 cellar: :any_skip_relocation, x86_64_linux: "8bc3bcc900da9ac8f0dfa3f7223e809b6b68c9d481aa48c62b35c1de70407d95"
|
|
end
|
|
|
|
depends_on "autoconf" => :build
|
|
depends_on "automake" => :build
|
|
depends_on "libtool" => :build
|
|
depends_on "pkg-config" => :build
|
|
depends_on "leptonica"
|
|
depends_on "libarchive"
|
|
|
|
on_linux do
|
|
depends_on "gcc"
|
|
end
|
|
|
|
fails_with gcc: "5"
|
|
|
|
resource "eng" do
|
|
url "https://github.com/tesseract-ocr/tessdata_fast/raw/4.1.0/eng.traineddata"
|
|
sha256 "7d4322bd2a7749724879683fc3912cb542f19906c83bcc1a52132556427170b2"
|
|
end
|
|
|
|
resource "osd" do
|
|
url "https://github.com/tesseract-ocr/tessdata_fast/raw/4.1.0/osd.traineddata"
|
|
sha256 "9cf5d576fcc47564f11265841e5ca839001e7e6f38ff7f7aacf46d15a96b00ff"
|
|
end
|
|
|
|
resource "snum" do
|
|
url "https://github.com/USCDataScience/counterfeit-electronics-tesseract/raw/319a6eeacff181dad5c02f3e7a3aff804eaadeca/Training%20Tesseract/snum.traineddata"
|
|
sha256 "36f772980ff17c66a767f584a0d80bf2302a1afa585c01a226c1863afcea1392"
|
|
end
|
|
|
|
resource "test_resource" do
|
|
url "https://raw.githubusercontent.com/tesseract-ocr/test/6dd816cdaf3e76153271daf773e562e24c928bf5/testing/eurotext.tif"
|
|
sha256 "7b9bd14aba7d5e30df686fbb6f71782a97f48f81b32dc201a1b75afe6de747d6"
|
|
end
|
|
|
|
def install
|
|
# explicitly state leptonica header location, as the makefile defaults to /usr/local/include,
|
|
# which doesn't work for non-default homebrew location
|
|
ENV["LIBLEPT_HEADERSDIR"] = HOMEBREW_PREFIX/"include"
|
|
|
|
ENV.cxx11
|
|
|
|
system "./autogen.sh"
|
|
system "./configure", "--prefix=#{prefix}",
|
|
"--disable-dependency-tracking",
|
|
"--datarootdir=#{HOMEBREW_PREFIX}/share"
|
|
|
|
system "make"
|
|
|
|
# make install in the local share folder to avoid permission errors
|
|
system "make", "install", "datarootdir=#{share}"
|
|
|
|
resource("snum").stage { mv "snum.traineddata", share/"tessdata" }
|
|
resource("eng").stage { mv "eng.traineddata", share/"tessdata" }
|
|
resource("osd").stage { mv "osd.traineddata", share/"tessdata" }
|
|
end
|
|
|
|
def caveats
|
|
<<~EOS
|
|
This formula contains only the "eng", "osd", and "snum" language data files.
|
|
If you need any other supported languages, run `brew install tesseract-lang`.
|
|
EOS
|
|
end
|
|
|
|
test do
|
|
resource("test_resource").stage do
|
|
system bin/"tesseract", "./eurotext.tif", "./output", "-l", "eng"
|
|
assert_match "The (quick) [brown] {fox} jumps!\n", File.read("output.txt")
|
|
end
|
|
end
|
|
end
|