Merge branch 'refact/engine' into 'master'

refactor interfaces and names

See merge request raphael.cerveaux/go-search!2
master
raphael 2020-04-17 17:53:24 +00:00
commit 6835cf93fc
10 changed files with 62 additions and 55 deletions

View File

@ -29,17 +29,12 @@ var dumpCmd = &cobra.Command{
var results = make(map[string]map[string]int)
files, err := client.GetAllFiles()
files, err := client.GetFiles()
if err != nil {
log.Fatalf("error: %v", err)
}
for _, file := range files {
words, err := client.GetWordsFromFile(file)
if err != nil {
log.Fatalf("error: %v", err)
}
// Open File
f, err := ioutil.ReadFile(file)
if err != nil {
@ -47,12 +42,12 @@ var dumpCmd = &cobra.Command{
}
fileContent := string(f)
for _, word := range words {
if results[word] == nil {
results[word] = make(map[string]int)
for w, s := range engine.Scan(fileContent) {
if results[w] == nil {
results[w] = make(map[string]int)
}
results[word][file] = engine.CountWord(fileContent, word)
results[w][file] = s
}
}

View File

@ -30,14 +30,19 @@ var indexCmd = &cobra.Command{
fmt.Printf("Walking %v...\n", path)
for _, file := range engine.GetFilesFromDir(path) {
files, err := engine.ScanDir(path)
if err != nil {
panic(err)
}
for _, file := range files {
// Open File
f, err := ioutil.ReadFile(file)
if err != nil {
panic(err)
}
if !engine.IsTextFile(f) {
if !engine.IsText(f) {
continue
}

View File

@ -31,7 +31,7 @@ var queryCmd = &cobra.Command{
fmt.Printf("Querying index for \"%s\":\n\n", word)
files, err := client.GetAllFiles()
files, err := client.GetFiles()
if err != nil {
log.Fatal(err)
}

View File

@ -2,31 +2,26 @@ package engine
import (
"fmt"
"log"
"os"
"path/filepath"
)
// GetFilesFromDir walks through a directory and returns
// ScanDir walks through a directory and returns
// file paths contained inside.
func GetFilesFromDir(dir string) (files []string) {
err := filepath.Walk(dir, func(fp string, fi os.FileInfo, err error) error {
func ScanDir(dir string) (files []string, err error) {
err = filepath.Walk(dir, func(path string, info os.FileInfo, err error) error {
if err != nil {
fmt.Println(err)
return nil
}
if fi.IsDir() {
if info.IsDir() {
return nil
}
files = append(files, fp)
files = append(files, path)
return nil
})
if err != nil {
log.Fatal(err)
}
return files
return files, err
}

View File

@ -30,10 +30,10 @@ func NewRedisClient(addr, port, password string, db int) (*RedisClient, error) {
// AddFile index a file
func (c *RedisClient) AddFile(file, content string) error {
for _, v := range GetWordsFromText(content) {
for w, s := range Scan(content) {
if err := c.conn.ZAdd(file, &redis.Z{
Score: float64(CountWord(content, v)),
Member: strings.ToLower(v),
Score: float64(s),
Member: strings.ToLower(w),
}).Err(); err != nil {
return err
}
@ -42,8 +42,8 @@ func (c *RedisClient) AddFile(file, content string) error {
return nil
}
// GetWordsFromFile search for a key
func (c *RedisClient) GetWordsFromFile(key string) ([]string, error) {
// Get search for a key
func (c *RedisClient) Get(key string) ([]string, error) {
return c.conn.ZRevRangeByScore(key, &redis.ZRangeBy{
Min: "-inf",
Max: "+inf",
@ -57,8 +57,8 @@ func (c *RedisClient) GetWordScoreFromFile(key, member string) float64 {
return c.conn.ZScore(key, member).Val()
}
// GetAllFiles returns a key value
func (c *RedisClient) GetAllFiles() (keys []string, err error) {
// GetFiles returns a key value
func (c *RedisClient) GetFiles() (keys []string, err error) {
keys, _, err = c.conn.Scan(0, "*", -1).Result()
return
}

View File

@ -57,7 +57,7 @@ func TestRedisClient(t *testing.T) {
}).Err()
assert.Equal(nil, err, "should be equal")
values, err2 := redisClient.GetWordsFromFile("word")
values, err2 := redisClient.Get("word")
assert.Equal(nil, err2, "should be equal")
assert.Equal([]string{"file"}, values, "should be equal")

View File

@ -11,22 +11,22 @@ func TestResults(t *testing.T) {
t.Run("SortResultsByScore", func(t *testing.T) {
results := []*QueryResult{
&QueryResult{
{
File: "/file",
Score: 1,
FirstMatch: "",
},
&QueryResult{
{
File: "/file",
Score: 3,
FirstMatch: "",
},
&QueryResult{
{
File: "/file",
Score: 9,
FirstMatch: "",
},
&QueryResult{
{
File: "/file",
Score: 2,
FirstMatch: "",
@ -34,22 +34,22 @@ func TestResults(t *testing.T) {
}
expected := []*QueryResult{
&QueryResult{
{
File: "/file",
Score: 9,
FirstMatch: "",
},
&QueryResult{
{
File: "/file",
Score: 3,
FirstMatch: "",
},
&QueryResult{
{
File: "/file",
Score: 2,
FirstMatch: "",
},
&QueryResult{
{
File: "/file",
Score: 1,
FirstMatch: "",

View File

@ -8,11 +8,14 @@ import (
"strings"
)
// CountWord returns the number of
// occurence for a word in a given text.
func CountWord(text, word string) (count int) {
for _, v := range GetWordsFromText(text) {
if v == word {
// ScanHits is the type for words and their score
type ScanHits = map[string]int
func countWord(text, word string) (count int) {
scanner := bufio.NewScanner(bytes.NewBufferString(text))
scanner.Split(bufio.ScanWords)
for scanner.Scan() {
if scanner.Text() == word {
count++
}
}
@ -20,22 +23,25 @@ func CountWord(text, word string) (count int) {
return
}
// GetWordsFromText returns an array of words
func GetWordsFromText(text string) (words []string) {
// Scan returns an array of words
func Scan(text string) ScanHits {
words := make(map[string]int)
scanner := bufio.NewScanner(bytes.NewBufferString(text))
scanner.Split(bufio.ScanWords)
for scanner.Scan() {
w := strings.ToLower(scanner.Text())
word := scanner.Text()
re, _ := regexp.Compile("[,|.|(|)|_]")
words = append(words, re.ReplaceAllString(w, ""))
transformedWord := re.ReplaceAllString(strings.ToLower(word), "")
words[transformedWord] = countWord(text, word)
}
return words
}
// IsTextFile returns whever a file is a
// IsText returns whever a file is a
// reconized text file or not.
func IsTextFile(file []byte) bool {
func IsText(file []byte) bool {
contentType := http.DetectContentType(file)
return strings.Contains(contentType, "text/plain")

View File

@ -12,19 +12,19 @@ func TestText(t *testing.T) {
t.Run("CountWord", func(t *testing.T) {
text := "Contrairement à une opinion répandue, le Lorem Ipsum n'est pas simplement du texte aléatoire.\nIl trouve ses racines dans une oeuvre de la littérature latine classique datant de 45 av. J.-C., le rendant vieux de 2000 ans.\n\nUn professeur du Hampden-Sydney College, en Virginie, s'est intéressé à un des mots latins les plus obscurs, consectetur, extrait d'un passage du Lorem Ipsum, et en étudiant tous les usages de ce mot dans la littérature classique, découvrit la source incontestable du Lorem Ipsum."
assert.Equal(3, CountWord(text, "la"), "should be equal")
assert.Equal(3, countWord(text, "la"), "should be equal")
})
t.Run("GetWordsFromText", func(t *testing.T) {
text := "Un professeur du Hampden-Sydney College, en Virginie, s'est intéressé à un des mots latins les plus obscurs, consectetur, extrait d'un passage du Lorem Ipsum."
assert.Equal([]string{"un", "professeur", "du", "hampden-sydney", "college", "en", "virginie", "s'est", "intéressé", "à", "un", "des", "mots", "latins", "les", "plus", "obscurs", "consectetur", "extrait", "d'un", "passage", "du", "lorem", "ipsum"}, GetWordsFromText(text), "should be equal")
assert.Equal(map[string]int{"college": 1, "consectetur": 1, "d'un": 1, "des": 1, "du": 2, "en": 1, "extrait": 1, "hampden-sydney": 1, "intéressé": 1, "ipsum": 1, "latins": 1, "les": 1, "lorem": 1, "mots": 1, "obscurs": 1, "passage": 1, "plus": 1, "professeur": 1, "s'est": 1, "un": 1, "virginie": 1, "à": 1}, Scan(text), "should be equal")
})
t.Run("IsTextFile", func(t *testing.T) {
assert.Equal(true, IsTextFile([]byte("Here is a string....")), "should be equal")
assert.Equal(true, IsText([]byte("Here is a string....")), "should be equal")
assert.Equal(false, IsTextFile([]byte("<html></html>")), "should be equal")
assert.Equal(false, IsText([]byte("<html></html>")), "should be equal")
})
t.Run("GetFirstMatchingLine", func(t *testing.T) {

6
go.sum
View File

@ -1,7 +1,9 @@
cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU=
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc h1:cAKDfWh5VpdgMhJosfJnn5/FoN2SRZ4p7fJNX58YPaU=
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf h1:qet1QNfXsQxTZqLG4oE62mJzwPIB8+Tee4RNCL9ulrY=
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8=
github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q=
@ -78,6 +80,7 @@ github.com/prometheus/client_golang v0.9.3/go.mod h1:/TN21ttK/J9q6uSwhBd54HahCDf
github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
github.com/prometheus/common v0.0.0-20181113130724-41aa239b4cce/go.mod h1:daVV7qP5qjZbuso7PdcryaAu0sAZbrN9i7WWcTMWvro=
github.com/prometheus/common v0.4.0 h1:7etb9YClo3a6HjLzfl6rIQaU+FDfi0VSX39io3aQ+DM=
github.com/prometheus/common v0.4.0/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4=
github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk=
github.com/prometheus/procfs v0.0.0-20190507164030-5867b95ac084/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA=
@ -85,6 +88,7 @@ github.com/prometheus/tsdb v0.7.1/go.mod h1:qhTCs0VvXwvX/y3TZrWD7rabWM+ijKTux40T
github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg=
github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc=
github.com/sirupsen/logrus v1.2.0 h1:juTguoYk5qI21pwyTXY3B3Y5cOTH3ZUyZCg1v/mihuo=
github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=
github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM=
github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA=
@ -108,6 +112,7 @@ go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE=
go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0=
go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q=
golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2 h1:VklqNMn3ovrHsnt90PveolxSbWFaJdECFbxSq0Mqo2M=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3 h1:XQyxROzUlZH+WIQwySDgnISgOivlhjIEwaQaJEJrrN0=
@ -145,6 +150,7 @@ google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9Ywl
google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
google.golang.org/grpc v1.21.0/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM=
gopkg.in/alecthomas/kingpin.v2 v2.2.6 h1:jMFz6MfLP0/4fUyZle81rXUoxOBFi19VUFKVDOQfozc=
gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=