2022-01-06 17:13:14 +00:00
|
|
|
class Cwb3 < Formula
|
|
|
|
desc "Tools for managing and querying large text corpora with linguistic annotations"
|
|
|
|
homepage "https://cwb.sourceforge.io/"
|
|
|
|
url "https://downloads.sourceforge.net/project/cwb/cwb/cwb-3.5-RC/cwb-3.4.33-src.tar.gz"
|
|
|
|
sha256 "856b72785522d42f13f4a0528d2b80c2bf422c10411234a8e4b61df111af77dd"
|
|
|
|
license "GPL-2.0-or-later"
|
2022-01-27 15:51:31 +00:00
|
|
|
revision 2
|
2022-01-06 17:13:14 +00:00
|
|
|
head "svn://svn.code.sf.net/p/cwb/code/cwb/trunk"
|
|
|
|
|
2022-01-11 00:07:52 +00:00
|
|
|
livecheck do
|
|
|
|
url "https://sourceforge.net/projects/cwb/rss?path=/cwb"
|
|
|
|
regex(%r{url=.*?/cwb[._-]v?(\d+(?:\.\d+)+)-src\.t}i)
|
|
|
|
end
|
|
|
|
|
2022-01-09 14:10:14 +00:00
|
|
|
bottle do
|
2022-01-27 17:12:12 +00:00
|
|
|
sha256 cellar: :any, arm64_monterey: "fce8071b9fe5b2c2bdd9b237ddac22d2df6b4fe2c08d29eb8bea456b7ab814d8"
|
|
|
|
sha256 cellar: :any, arm64_big_sur: "8f4fdd05b181e8f3c819c88d9cb7456b370a148ade914b5336ed7943c2725774"
|
|
|
|
sha256 cellar: :any, monterey: "157899904d915f2bc5dfe1cc7fd6e89899c91bd61c801c6854e831b811fde037"
|
|
|
|
sha256 cellar: :any, big_sur: "b8fec5b9f028d8f3ee3d98938bab86ac083ede05efe740504cd7abdb7ebc509e"
|
|
|
|
sha256 cellar: :any, catalina: "194365652e3e8e6478dfa0cc4c6e8fadf5d2191b253abc02d9e05ff0cbf7b792"
|
|
|
|
sha256 cellar: :any_skip_relocation, x86_64_linux: "574689b505bbb71eb7f626203b2d1e611208958ab5a9f320b805def38fe26a63"
|
2022-01-09 14:10:14 +00:00
|
|
|
end
|
|
|
|
|
2022-01-06 17:13:14 +00:00
|
|
|
depends_on "pkg-config" => :build
|
|
|
|
depends_on "gettext"
|
|
|
|
depends_on "glib"
|
|
|
|
depends_on "pcre"
|
|
|
|
depends_on "readline"
|
|
|
|
|
|
|
|
uses_from_macos "bison" => :build
|
|
|
|
uses_from_macos "flex" => :build
|
|
|
|
uses_from_macos "ncurses"
|
|
|
|
|
|
|
|
resource("tutorial_data") do
|
|
|
|
url "https://cwb.sourceforge.io/files/encoding_tutorial_data.zip"
|
|
|
|
sha256 "bbd37514fdbdfd25133808afec6a11037fb28253e63446a9e548fb437cbdc6f0"
|
|
|
|
end
|
|
|
|
|
|
|
|
def install
|
|
|
|
args = %W[
|
|
|
|
PLATFORM=homebrew-formula
|
|
|
|
SITE=homebrew-formula
|
|
|
|
FULL_MESSAGES=1
|
|
|
|
PREFIX=#{prefix}
|
|
|
|
HOMEBREW_ROOT=#{HOMEBREW_PREFIX}
|
|
|
|
]
|
|
|
|
|
|
|
|
system "make", "all", *args
|
|
|
|
ENV.deparallelize
|
|
|
|
system "make", "install", *args
|
2022-01-27 15:51:31 +00:00
|
|
|
|
|
|
|
# Avoid rebuilds when dependencies are bumped.
|
|
|
|
inreplace bin/"cwb-config" do |s|
|
|
|
|
s.gsub! Formula["glib"].prefix.realpath, Formula["glib"].opt_prefix
|
|
|
|
s.gsub! Formula["pcre"].prefix.realpath, Formula["pcre"].opt_prefix
|
|
|
|
end
|
2022-01-06 17:13:14 +00:00
|
|
|
end
|
|
|
|
|
|
|
|
def default_registry
|
|
|
|
HOMEBREW_PREFIX/"share/cwb/registry"
|
|
|
|
end
|
|
|
|
|
|
|
|
def post_install
|
|
|
|
# make sure default registry exists
|
|
|
|
default_registry.mkpath
|
|
|
|
end
|
|
|
|
|
|
|
|
def caveats
|
|
|
|
<<~STOP
|
|
|
|
CWB default registry directory: #{default_registry}
|
|
|
|
STOP
|
|
|
|
end
|
|
|
|
|
|
|
|
test do
|
|
|
|
resource("tutorial_data").stage do
|
|
|
|
Pathname("registry").mkdir
|
|
|
|
Pathname("data").mkdir
|
|
|
|
|
|
|
|
system(bin/"cwb-encode", "-c", "ascii",
|
|
|
|
"-d", "data", "-R", "registry/ex", "-f", "example.vrt",
|
|
|
|
"-P", "pos", "-P", "lemma", "-S", "s:0")
|
|
|
|
assert_predicate(Pathname("registry")/"ex", :exist?,
|
|
|
|
"registry file has been created")
|
|
|
|
assert_predicate(Pathname("data")/"lemma.lexicon", :exist?,
|
|
|
|
"lexicon file for p-attribute lemma has been created")
|
|
|
|
|
|
|
|
system(bin/"cwb-makeall", "-r", "registry", "EX")
|
|
|
|
assert_predicate(Pathname("data")/"lemma.corpus.rev", :exist?,
|
|
|
|
"reverse index file for p-attribute lemma has been created")
|
|
|
|
|
|
|
|
assert_equal("Tokens:\t5\nTypes:\t5\n",
|
|
|
|
shell_output("#{bin}/cwb-lexdecode -r registry -S EX"),
|
|
|
|
"correct token & type count for p-attribute")
|
|
|
|
assert_equal("0\t4\n",
|
|
|
|
shell_output("#{bin}/cwb-s-decode -r registry EX -S s"),
|
|
|
|
"correct span for s-attribute")
|
|
|
|
|
|
|
|
assert_equal("3\n",
|
|
|
|
shell_output("#{bin}/cqpcl -r registry -D EX 'A=[pos = \"\\w{2}\"]; size A;'"),
|
|
|
|
"CQP query works correctly")
|
|
|
|
|
|
|
|
Pathname("test.c").write <<~STOP
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <cwb/cl.h>
|
|
|
|
|
|
|
|
int main(int argc, char *argv[]) {
|
|
|
|
int *id, n_id, n_token;
|
|
|
|
Corpus *C = cl_new_corpus("registry", "ex");
|
|
|
|
Attribute *word = cl_new_attribute(C, "word", ATT_POS);
|
|
|
|
id = cl_regex2id(word, "\\\\p{Ll}+", 0, &n_id);
|
|
|
|
if (n_id > 0)
|
|
|
|
n_token = cl_idlist2freq(word, id, n_id);
|
|
|
|
else
|
|
|
|
n_token = 0;
|
|
|
|
printf("%d\\n", n_token);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
STOP
|
|
|
|
cppflags = Utils.safe_popen_read("#{bin}/cwb-config", "-I").strip.split
|
|
|
|
ldflags = Utils.safe_popen_read("#{bin}/cwb-config", "-L").strip.split
|
|
|
|
system ENV.cc, "-o", "test", *cppflags, "test.c", *ldflags
|
|
|
|
assert_equal("3\n", shell_output("./test"),
|
|
|
|
"compiled test program works")
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|