I want to find whole words in Greek text using Golang.
Using \b as the word boundary for the regular expression pattern
works when I use English, but not Greek.
When I run the following, one occurrence of joy
is found, which is correct.
But, for the Greek, nothing is returned.
If I remove the \b on each side of the Greek, two occurrences of ἀγαπη
are found.
func main() {
pattern := "\\bjoy\\b"
text := "sing joyfully with joy"
matcher, err := regexp.Compile(pattern)
if err != nil {
fmt.Println(err)
}
indexes := matcher.FindAllStringIndex(text, -1)
expect := 1
got := len(indexes)
if expect != got {
fmt.Printf("expected %d, got %d\n", expect, got)
}
pattern = "\\bἀγαπη\\b"
text = "Ὡς ἀγαπη τὰ σκηνώματά σου. Ὡς ἀγαπητὰ τὰ σκηνώματά σου."
matcher, err = regexp.Compile(pattern)
if err != nil {
fmt.Println(err)
}
indexes = matcher.FindAllStringIndex(text, -1)
expect = 1
got = len(indexes)
if expect != got {
fmt.Printf("expected %d, got %d\n", expect, got)
}
}