parent
c02f8d7a6c
commit
a5bd7af787
15
cmd/dump.go
15
cmd/dump.go
|
@ -29,17 +29,12 @@ var dumpCmd = &cobra.Command{
|
|||
|
||||
var results = make(map[string]map[string]int)
|
||||
|
||||
files, err := client.GetAllFiles()
|
||||
files, err := client.GetFiles()
|
||||
if err != nil {
|
||||
log.Fatalf("error: %v", err)
|
||||
}
|
||||
|
||||
for _, file := range files {
|
||||
words, err := client.GetWordsFromFile(file)
|
||||
if err != nil {
|
||||
log.Fatalf("error: %v", err)
|
||||
}
|
||||
|
||||
// Open File
|
||||
f, err := ioutil.ReadFile(file)
|
||||
if err != nil {
|
||||
|
@ -47,12 +42,12 @@ var dumpCmd = &cobra.Command{
|
|||
}
|
||||
fileContent := string(f)
|
||||
|
||||
for _, word := range words {
|
||||
if results[word] == nil {
|
||||
results[word] = make(map[string]int)
|
||||
for w, s := range engine.Scan(fileContent) {
|
||||
if results[w] == nil {
|
||||
results[w] = make(map[string]int)
|
||||
}
|
||||
|
||||
results[word][file] = engine.CountWord(fileContent, word)
|
||||
results[w][file] = s
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -30,14 +30,19 @@ var indexCmd = &cobra.Command{
|
|||
|
||||
fmt.Printf("Walking %v...\n", path)
|
||||
|
||||
for _, file := range engine.GetFilesFromDir(path) {
|
||||
files, err := engine.ScanDir(path)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
for _, file := range files {
|
||||
// Open File
|
||||
f, err := ioutil.ReadFile(file)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
if !engine.IsTextFile(f) {
|
||||
if !engine.IsText(f) {
|
||||
continue
|
||||
}
|
||||
|
||||
|
|
|
@ -31,7 +31,7 @@ var queryCmd = &cobra.Command{
|
|||
|
||||
fmt.Printf("Querying index for \"%s\":\n\n", word)
|
||||
|
||||
files, err := client.GetAllFiles()
|
||||
files, err := client.GetFiles()
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
|
|
@ -2,31 +2,26 @@ package engine
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
)
|
||||
|
||||
// GetFilesFromDir walks through a directory and returns
|
||||
// ScanDir walks through a directory and returns
|
||||
// file paths contained inside.
|
||||
func GetFilesFromDir(dir string) (files []string) {
|
||||
err := filepath.Walk(dir, func(fp string, fi os.FileInfo, err error) error {
|
||||
func ScanDir(dir string) (files []string, err error) {
|
||||
err = filepath.Walk(dir, func(path string, info os.FileInfo, err error) error {
|
||||
if err != nil {
|
||||
fmt.Println(err)
|
||||
return nil
|
||||
}
|
||||
if fi.IsDir() {
|
||||
if info.IsDir() {
|
||||
return nil
|
||||
}
|
||||
|
||||
files = append(files, fp)
|
||||
files = append(files, path)
|
||||
|
||||
return nil
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
return files
|
||||
return files, err
|
||||
}
|
||||
|
|
|
@ -30,10 +30,10 @@ func NewRedisClient(addr, port, password string, db int) (*RedisClient, error) {
|
|||
|
||||
// AddFile index a file
|
||||
func (c *RedisClient) AddFile(file, content string) error {
|
||||
for _, v := range GetWordsFromText(content) {
|
||||
for w, s := range Scan(content) {
|
||||
if err := c.conn.ZAdd(file, &redis.Z{
|
||||
Score: float64(CountWord(content, v)),
|
||||
Member: strings.ToLower(v),
|
||||
Score: float64(s),
|
||||
Member: strings.ToLower(w),
|
||||
}).Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -42,8 +42,8 @@ func (c *RedisClient) AddFile(file, content string) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
// GetWordsFromFile search for a key
|
||||
func (c *RedisClient) GetWordsFromFile(key string) ([]string, error) {
|
||||
// Get search for a key
|
||||
func (c *RedisClient) Get(key string) ([]string, error) {
|
||||
return c.conn.ZRevRangeByScore(key, &redis.ZRangeBy{
|
||||
Min: "-inf",
|
||||
Max: "+inf",
|
||||
|
@ -57,8 +57,8 @@ func (c *RedisClient) GetWordScoreFromFile(key, member string) float64 {
|
|||
return c.conn.ZScore(key, member).Val()
|
||||
}
|
||||
|
||||
// GetAllFiles returns a key value
|
||||
func (c *RedisClient) GetAllFiles() (keys []string, err error) {
|
||||
// GetFiles returns a key value
|
||||
func (c *RedisClient) GetFiles() (keys []string, err error) {
|
||||
keys, _, err = c.conn.Scan(0, "*", -1).Result()
|
||||
return
|
||||
}
|
||||
|
|
|
@ -57,7 +57,7 @@ func TestRedisClient(t *testing.T) {
|
|||
}).Err()
|
||||
assert.Equal(nil, err, "should be equal")
|
||||
|
||||
values, err2 := redisClient.GetWordsFromFile("word")
|
||||
values, err2 := redisClient.Get("word")
|
||||
assert.Equal(nil, err2, "should be equal")
|
||||
|
||||
assert.Equal([]string{"file"}, values, "should be equal")
|
||||
|
|
|
@ -11,22 +11,22 @@ func TestResults(t *testing.T) {
|
|||
|
||||
t.Run("SortResultsByScore", func(t *testing.T) {
|
||||
results := []*QueryResult{
|
||||
&QueryResult{
|
||||
{
|
||||
File: "/file",
|
||||
Score: 1,
|
||||
FirstMatch: "",
|
||||
},
|
||||
&QueryResult{
|
||||
{
|
||||
File: "/file",
|
||||
Score: 3,
|
||||
FirstMatch: "",
|
||||
},
|
||||
&QueryResult{
|
||||
{
|
||||
File: "/file",
|
||||
Score: 9,
|
||||
FirstMatch: "",
|
||||
},
|
||||
&QueryResult{
|
||||
{
|
||||
File: "/file",
|
||||
Score: 2,
|
||||
FirstMatch: "",
|
||||
|
@ -34,22 +34,22 @@ func TestResults(t *testing.T) {
|
|||
}
|
||||
|
||||
expected := []*QueryResult{
|
||||
&QueryResult{
|
||||
{
|
||||
File: "/file",
|
||||
Score: 9,
|
||||
FirstMatch: "",
|
||||
},
|
||||
&QueryResult{
|
||||
{
|
||||
File: "/file",
|
||||
Score: 3,
|
||||
FirstMatch: "",
|
||||
},
|
||||
&QueryResult{
|
||||
{
|
||||
File: "/file",
|
||||
Score: 2,
|
||||
FirstMatch: "",
|
||||
},
|
||||
&QueryResult{
|
||||
{
|
||||
File: "/file",
|
||||
Score: 1,
|
||||
FirstMatch: "",
|
||||
|
|
|
@ -8,11 +8,14 @@ import (
|
|||
"strings"
|
||||
)
|
||||
|
||||
// CountWord returns the number of
|
||||
// occurence for a word in a given text.
|
||||
func CountWord(text, word string) (count int) {
|
||||
for _, v := range GetWordsFromText(text) {
|
||||
if v == word {
|
||||
// ScanHits is the type for words and their score
|
||||
type ScanHits = map[string]int
|
||||
|
||||
func countWord(text, word string) (count int) {
|
||||
scanner := bufio.NewScanner(bytes.NewBufferString(text))
|
||||
scanner.Split(bufio.ScanWords)
|
||||
for scanner.Scan() {
|
||||
if scanner.Text() == word {
|
||||
count++
|
||||
}
|
||||
}
|
||||
|
@ -20,22 +23,25 @@ func CountWord(text, word string) (count int) {
|
|||
return
|
||||
}
|
||||
|
||||
// GetWordsFromText returns an array of words
|
||||
func GetWordsFromText(text string) (words []string) {
|
||||
// Scan returns an array of words
|
||||
func Scan(text string) ScanHits {
|
||||
words := make(map[string]int)
|
||||
|
||||
scanner := bufio.NewScanner(bytes.NewBufferString(text))
|
||||
scanner.Split(bufio.ScanWords)
|
||||
for scanner.Scan() {
|
||||
w := strings.ToLower(scanner.Text())
|
||||
word := scanner.Text()
|
||||
re, _ := regexp.Compile("[,|.|(|)|_]")
|
||||
words = append(words, re.ReplaceAllString(w, ""))
|
||||
transformedWord := re.ReplaceAllString(strings.ToLower(word), "")
|
||||
words[transformedWord] = countWord(text, word)
|
||||
}
|
||||
|
||||
return words
|
||||
}
|
||||
|
||||
// IsTextFile returns whever a file is a
|
||||
// IsText returns whever a file is a
|
||||
// reconized text file or not.
|
||||
func IsTextFile(file []byte) bool {
|
||||
func IsText(file []byte) bool {
|
||||
contentType := http.DetectContentType(file)
|
||||
|
||||
return strings.Contains(contentType, "text/plain")
|
||||
|
|
|
@ -12,19 +12,19 @@ func TestText(t *testing.T) {
|
|||
t.Run("CountWord", func(t *testing.T) {
|
||||
text := "Contrairement à une opinion répandue, le Lorem Ipsum n'est pas simplement du texte aléatoire.\nIl trouve ses racines dans une oeuvre de la littérature latine classique datant de 45 av. J.-C., le rendant vieux de 2000 ans.\n\nUn professeur du Hampden-Sydney College, en Virginie, s'est intéressé à un des mots latins les plus obscurs, consectetur, extrait d'un passage du Lorem Ipsum, et en étudiant tous les usages de ce mot dans la littérature classique, découvrit la source incontestable du Lorem Ipsum."
|
||||
|
||||
assert.Equal(3, CountWord(text, "la"), "should be equal")
|
||||
assert.Equal(3, countWord(text, "la"), "should be equal")
|
||||
})
|
||||
|
||||
t.Run("GetWordsFromText", func(t *testing.T) {
|
||||
text := "Un professeur du Hampden-Sydney College, en Virginie, s'est intéressé à un des mots latins les plus obscurs, consectetur, extrait d'un passage du Lorem Ipsum."
|
||||
|
||||
assert.Equal([]string{"un", "professeur", "du", "hampden-sydney", "college", "en", "virginie", "s'est", "intéressé", "à", "un", "des", "mots", "latins", "les", "plus", "obscurs", "consectetur", "extrait", "d'un", "passage", "du", "lorem", "ipsum"}, GetWordsFromText(text), "should be equal")
|
||||
assert.Equal(map[string]int{"college": 1, "consectetur": 1, "d'un": 1, "des": 1, "du": 2, "en": 1, "extrait": 1, "hampden-sydney": 1, "intéressé": 1, "ipsum": 1, "latins": 1, "les": 1, "lorem": 1, "mots": 1, "obscurs": 1, "passage": 1, "plus": 1, "professeur": 1, "s'est": 1, "un": 1, "virginie": 1, "à": 1}, Scan(text), "should be equal")
|
||||
})
|
||||
|
||||
t.Run("IsTextFile", func(t *testing.T) {
|
||||
assert.Equal(true, IsTextFile([]byte("Here is a string....")), "should be equal")
|
||||
assert.Equal(true, IsText([]byte("Here is a string....")), "should be equal")
|
||||
|
||||
assert.Equal(false, IsTextFile([]byte("<html></html>")), "should be equal")
|
||||
assert.Equal(false, IsText([]byte("<html></html>")), "should be equal")
|
||||
})
|
||||
|
||||
t.Run("GetFirstMatchingLine", func(t *testing.T) {
|
||||
|
|
6
go.sum
6
go.sum
|
@ -1,7 +1,9 @@
|
|||
cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
|
||||
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
|
||||
github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU=
|
||||
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc h1:cAKDfWh5VpdgMhJosfJnn5/FoN2SRZ4p7fJNX58YPaU=
|
||||
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
|
||||
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf h1:qet1QNfXsQxTZqLG4oE62mJzwPIB8+Tee4RNCL9ulrY=
|
||||
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
|
||||
github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8=
|
||||
github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q=
|
||||
|
@ -78,6 +80,7 @@ github.com/prometheus/client_golang v0.9.3/go.mod h1:/TN21ttK/J9q6uSwhBd54HahCDf
|
|||
github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
|
||||
github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
|
||||
github.com/prometheus/common v0.0.0-20181113130724-41aa239b4cce/go.mod h1:daVV7qP5qjZbuso7PdcryaAu0sAZbrN9i7WWcTMWvro=
|
||||
github.com/prometheus/common v0.4.0 h1:7etb9YClo3a6HjLzfl6rIQaU+FDfi0VSX39io3aQ+DM=
|
||||
github.com/prometheus/common v0.4.0/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4=
|
||||
github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk=
|
||||
github.com/prometheus/procfs v0.0.0-20190507164030-5867b95ac084/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA=
|
||||
|
@ -85,6 +88,7 @@ github.com/prometheus/tsdb v0.7.1/go.mod h1:qhTCs0VvXwvX/y3TZrWD7rabWM+ijKTux40T
|
|||
github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg=
|
||||
github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
|
||||
github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc=
|
||||
github.com/sirupsen/logrus v1.2.0 h1:juTguoYk5qI21pwyTXY3B3Y5cOTH3ZUyZCg1v/mihuo=
|
||||
github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=
|
||||
github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM=
|
||||
github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA=
|
||||
|
@ -108,6 +112,7 @@ go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE=
|
|||
go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0=
|
||||
go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q=
|
||||
golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
|
||||
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2 h1:VklqNMn3ovrHsnt90PveolxSbWFaJdECFbxSq0Mqo2M=
|
||||
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
||||
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
|
||||
golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3 h1:XQyxROzUlZH+WIQwySDgnISgOivlhjIEwaQaJEJrrN0=
|
||||
|
@ -145,6 +150,7 @@ google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9Ywl
|
|||
google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
|
||||
google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
|
||||
google.golang.org/grpc v1.21.0/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM=
|
||||
gopkg.in/alecthomas/kingpin.v2 v2.2.6 h1:jMFz6MfLP0/4fUyZle81rXUoxOBFi19VUFKVDOQfozc=
|
||||
gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
|
|
Loading…
Reference in New Issue