// Copyright 2015 go-swagger maintainers // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package swag import ( "bytes" "sync" "unicode" "unicode/utf8" ) type ( splitter struct { initialisms []string initialismsRunes [][]rune initialismsUpperCased [][]rune // initialisms cached in their trimmed, upper-cased version postSplitInitialismCheck bool } splitterOption func(*splitter) initialismMatch struct { body []rune start, end int complete bool } initialismMatches []initialismMatch ) type ( // memory pools of temporary objects. // // These are used to recycle temporarily allocated objects // and relieve the GC from undue pressure. matchesPool struct { *sync.Pool } buffersPool struct { *sync.Pool } lexemsPool struct { *sync.Pool } splittersPool struct { *sync.Pool } ) var ( // poolOfMatches holds temporary slices for recycling during the initialism match process poolOfMatches = matchesPool{ Pool: &sync.Pool{ New: func() any { s := make(initialismMatches, 0, maxAllocMatches) return &s }, }, } poolOfBuffers = buffersPool{ Pool: &sync.Pool{ New: func() any { return new(bytes.Buffer) }, }, } poolOfLexems = lexemsPool{ Pool: &sync.Pool{ New: func() any { s := make([]nameLexem, 0, maxAllocMatches) return &s }, }, } poolOfSplitters = splittersPool{ Pool: &sync.Pool{ New: func() any { s := newSplitter() return &s }, }, } ) // nameReplaceTable finds a word representation for special characters. func nameReplaceTable(r rune) (string, bool) { switch r { case '@': return "At ", true case '&': return "And ", true case '|': return "Pipe ", true case '$': return "Dollar ", true case '!': return "Bang ", true case '-': return "", true case '_': return "", true default: return "", false } } // split calls the splitter. // // Use newSplitter for more control and options func split(str string) []string { s := poolOfSplitters.BorrowSplitter() lexems := s.split(str) result := make([]string, 0, len(*lexems)) for _, lexem := range *lexems { result = append(result, lexem.GetOriginal()) } poolOfLexems.RedeemLexems(lexems) poolOfSplitters.RedeemSplitter(s) return result } func newSplitter(options ...splitterOption) splitter { s := splitter{ postSplitInitialismCheck: false, initialisms: initialisms, initialismsRunes: initialismsRunes, initialismsUpperCased: initialismsUpperCased, } for _, option := range options { option(&s) } return s } // withPostSplitInitialismCheck allows to catch initialisms after main split process func withPostSplitInitialismCheck(s *splitter) { s.postSplitInitialismCheck = true } func (p matchesPool) BorrowMatches() *initialismMatches { s := p.Get().(*initialismMatches) *s = (*s)[:0] // reset slice, keep allocated capacity return s } func (p buffersPool) BorrowBuffer(size int) *bytes.Buffer { s := p.Get().(*bytes.Buffer) s.Reset() if s.Cap() < size { s.Grow(size) } return s } func (p lexemsPool) BorrowLexems() *[]nameLexem { s := p.Get().(*[]nameLexem) *s = (*s)[:0] // reset slice, keep allocated capacity return s } func (p splittersPool) BorrowSplitter(options ...splitterOption) *splitter { s := p.Get().(*splitter) s.postSplitInitialismCheck = false // reset options for _, apply := range options { apply(s) } return s } func (p matchesPool) RedeemMatches(s *initialismMatches) { p.Put(s) } func (p buffersPool) RedeemBuffer(s *bytes.Buffer) { p.Put(s) } func (p lexemsPool) RedeemLexems(s *[]nameLexem) { p.Put(s) } func (p splittersPool) RedeemSplitter(s *splitter) { p.Put(s) } func (m initialismMatch) isZero() bool { return m.start == 0 && m.end == 0 } func (s splitter) split(name string) *[]nameLexem { nameRunes := []rune(name) matches := s.gatherInitialismMatches(nameRunes) if matches == nil { return poolOfLexems.BorrowLexems() } return s.mapMatchesToNameLexems(nameRunes, matches) } func (s splitter) gatherInitialismMatches(nameRunes []rune) *initialismMatches { var matches *initialismMatches for currentRunePosition, currentRune := range nameRunes { // recycle these allocations as we loop over runes // with such recycling, only 2 slices should be allocated per call // instead of o(n). newMatches := poolOfMatches.BorrowMatches() // check current initialism matches if matches != nil { // skip first iteration for _, match := range *matches { if keepCompleteMatch := match.complete; keepCompleteMatch { *newMatches = append(*newMatches, match) continue } // drop failed match currentMatchRune := match.body[currentRunePosition-match.start] if currentMatchRune != currentRune { continue } // try to complete ongoing match if currentRunePosition-match.start == len(match.body)-1 { // we are close; the next step is to check the symbol ahead // if it is a small letter, then it is not the end of match // but beginning of the next word if currentRunePosition < len(nameRunes)-1 { nextRune := nameRunes[currentRunePosition+1] if newWord := unicode.IsLower(nextRune); newWord { // oh ok, it was the start of a new word continue } } match.complete = true match.end = currentRunePosition } *newMatches = append(*newMatches, match) } } // check for new initialism matches for i := range s.initialisms { initialismRunes := s.initialismsRunes[i] if initialismRunes[0] == currentRune { *newMatches = append(*newMatches, initialismMatch{ start: currentRunePosition, body: initialismRunes, complete: false, }) } } if matches != nil { poolOfMatches.RedeemMatches(matches) } matches = newMatches } // up to the caller to redeem this last slice return matches } func (s splitter) mapMatchesToNameLexems(nameRunes []rune, matches *initialismMatches) *[]nameLexem { nameLexems := poolOfLexems.BorrowLexems() var lastAcceptedMatch initialismMatch for _, match := range *matches { if !match.complete { continue } if firstMatch := lastAcceptedMatch.isZero(); firstMatch { s.appendBrokenDownCasualString(nameLexems, nameRunes[:match.start]) *nameLexems = append(*nameLexems, s.breakInitialism(string(match.body))) lastAcceptedMatch = match continue } if overlappedMatch := match.start <= lastAcceptedMatch.end; overlappedMatch { continue } middle := nameRunes[lastAcceptedMatch.end+1 : match.start] s.appendBrokenDownCasualString(nameLexems, middle) *nameLexems = append(*nameLexems, s.breakInitialism(string(match.body))) lastAcceptedMatch = match } // we have not found any accepted matches if lastAcceptedMatch.isZero() { *nameLexems = (*nameLexems)[:0] s.appendBrokenDownCasualString(nameLexems, nameRunes) } else if lastAcceptedMatch.end+1 != len(nameRunes) { rest := nameRunes[lastAcceptedMatch.end+1:] s.appendBrokenDownCasualString(nameLexems, rest) } poolOfMatches.RedeemMatches(matches) return nameLexems } func (s splitter) breakInitialism(original string) nameLexem { return newInitialismNameLexem(original, original) } func (s splitter) appendBrokenDownCasualString(segments *[]nameLexem, str []rune) { currentSegment := poolOfBuffers.BorrowBuffer(len(str)) // unlike strings.Builder, bytes.Buffer initial storage can reused defer func() { poolOfBuffers.RedeemBuffer(currentSegment) }() addCasualNameLexem := func(original string) { *segments = append(*segments, newCasualNameLexem(original)) } addInitialismNameLexem := func(original, match string) { *segments = append(*segments, newInitialismNameLexem(original, match)) } var addNameLexem func(string) if s.postSplitInitialismCheck { addNameLexem = func(original string) { for i := range s.initialisms { if isEqualFoldIgnoreSpace(s.initialismsUpperCased[i], original) { addInitialismNameLexem(original, s.initialisms[i]) return } } addCasualNameLexem(original) } } else { addNameLexem = addCasualNameLexem } for _, rn := range str { if replace, found := nameReplaceTable(rn); found { if currentSegment.Len() > 0 { addNameLexem(currentSegment.String()) currentSegment.Reset() } if replace != "" { addNameLexem(replace) } continue } if !unicode.In(rn, unicode.L, unicode.M, unicode.N, unicode.Pc) { if currentSegment.Len() > 0 { addNameLexem(currentSegment.String()) currentSegment.Reset() } continue } if unicode.IsUpper(rn) { if currentSegment.Len() > 0 { addNameLexem(currentSegment.String()) } currentSegment.Reset() } currentSegment.WriteRune(rn) } if currentSegment.Len() > 0 { addNameLexem(currentSegment.String()) } } // isEqualFoldIgnoreSpace is the same as strings.EqualFold, but // it ignores leading and trailing blank spaces in the compared // string. // // base is assumed to be composed of upper-cased runes, and be already // trimmed. // // This code is heavily inspired from strings.EqualFold. func isEqualFoldIgnoreSpace(base []rune, str string) bool { var i, baseIndex int // equivalent to b := []byte(str), but without data copy b := hackStringBytes(str) for i < len(b) { if c := b[i]; c < utf8.RuneSelf { // fast path for ASCII if c != ' ' && c != '\t' { break } i++ continue } // unicode case r, size := utf8.DecodeRune(b[i:]) if !unicode.IsSpace(r) { break } i += size } if i >= len(b) { return len(base) == 0 } for _, baseRune := range base { if i >= len(b) { break } if c := b[i]; c < utf8.RuneSelf { // single byte rune case (ASCII) if baseRune >= utf8.RuneSelf { return false } baseChar := byte(baseRune) if c != baseChar && !('a' <= c && c <= 'z' && c-'a'+'A' == baseChar) { return false } baseIndex++ i++ continue } // unicode case r, size := utf8.DecodeRune(b[i:]) if unicode.ToUpper(r) != baseRune { return false } baseIndex++ i += size } if baseIndex != len(base) { return false } // all passed: now we should only have blanks for i < len(b) { if c := b[i]; c < utf8.RuneSelf { // fast path for ASCII if c != ' ' && c != '\t' { return false } i++ continue } // unicode case r, size := utf8.DecodeRune(b[i:]) if !unicode.IsSpace(r) { return false } i += size } return true }