I solved it as playground
package main
import (
"errors"
"fmt"
"regexp"
"strings"
)
// https://golangexample.com/match-regex-group-into-go-struct-using-struct-tags-and-automatic-parsing/
func build(words ...string) (*regexp.Regexp, error) {
// words to be excluded
re := regexp.MustCompile(`(?i)^(this|is|a)`)
var sb strings.Builder
switch len(words) {
case 0:
return nil, errors.New("empty input")
case 1:
return regexp.Compile(regexp.QuoteMeta(words[0]))
}
quoted := make([]string, len(words))
for i, w := range words {
quoted[i] = regexp.QuoteMeta(w)
}
//sb.WriteByte(fmt.Sprintf("(?P<fullMatch>"))
sb.WriteString(fmt.Sprintf("(?P<fullMatch>"))
for i, w := range quoted {
if i > 0 {
sb.WriteByte('\x20')
}
sb.WriteString(w)
}
sb.WriteString(`)|`)
for _, w := range quoted {
matches := re.FindAllString(w, -1)
if len(matches) == 0 {
sb.WriteString(fmt.Sprintf("(%s)", w))
sb.WriteByte('|')
}
}
return regexp.Compile(`(?i)` + strings.TrimSuffix(sb.String(), "|"))
}
var words = regexp.MustCompile(`\pL+`)
func main() {
input := "\tThis\v\x20\x20,\t\tis\t\t,?a!,¿delimited?,string‽"
allWords := words.FindAllString(input, -1)
re, err := build(allWords...)
if err != nil {
panic(err)
}
fmt.Println(re)
str := "This is a delimited string, so let's go"
//matches := re.FindAllString(str, -1)
groupNames := re.SubexpNames()
matches := re.FindAllStringSubmatch(str, -1)
if len(matches) == 0 {
fmt.Println("Sorry, no single match had been found")
} else if len(matches[0][1]) > 0 {
fmt.Println("An exact match had been found")
fmt.Println("groupNames[1]:", groupNames[1], len(matches[0][1]), matches[0][1])
} else {
fmt.Println("A non-perfect match had been found")
fmt.Println("We found", len(matches), "matches, that are:", matches)
}
}
And the output for str := "This is a delimited string, so let's go"
is:
(?i)(?P<fullMatch>This is a delimited string)|(delimited)|(string)
An exact match had been found
groupNames[1]: fullMatch 26 This is a delimited string
And for str := "This is not a delimited string, so let's go"
, the output is:
(?i)(?P<fullMatch>This is a delimited string)|(delimited)|(string)
A non-perfect match had been found
We found 2 matches, that are: [[delimited delimited ] [string string]]
I’ve 2 clarifications:
-
Why in the last line, the groups are duplicated, why I have [delimited delimited ]
instead of [delimited]
, and why I have [string string]
instead of [string]
-
Is there any comments about my proposed approach to this issue resolved?