I’ve the below code, that is reading all txt
files at the root directory, scan them for specific words basedon the given regex
and report the words in the regex
that are found in each file.
The issue I’ve is if a wod is mentioned more than ones (i.e. in different loctions in the file) it will be reported as much as it is appearing,I need to reort it as unique so excluding duplicates. for example in the txt
file:
I'm an engineer not a doctor
really, I'm not a doctor
The word doctor
is reported twice, while I need to get it as unique, ie. it is enough for me to know it is there in the file.
My code is:
package main
import (
"fmt"
"io/ioutil"
"log"
"path/filepath"
"regexp"
"strings"
)
func main() {
files, err := ioutil.ReadDir(".")
if err != nil {
log.Fatal(err)
}
p := []string{}
p = append(p, "engineer")
p = append(p, "doctor")
p = append(p, "chemical (permit)")
skills := strings.Join(p, "|")
fmt.Println(skills)
re := regexp.MustCompile(`(?i)` + skills)
for _, file := range files {
if strings.ToLower(filepath.Ext(file.Name())) == ".txt" {
fmt.Println(file.Name())
b, err := ioutil.ReadFile(file.Name()) // just pass the file name
if err != nil {
fmt.Print(err)
}
//fmt.Println(b) // print the content as 'bytes'
str := string(b) // convert content to a 'string'
matches := re.FindAllString(str, -1)
fmt.Println(matches, len(matches))
for _, j := range matches {
fmt.Println(j)
}
}
}
}