From 446d5551787646c03b27fabdeb504dde36f97af6 Mon Sep 17 00:00:00 2001 From: Matt Butcher <mbutcher@engineyard.com> Date: Fri, 23 Sep 2016 21:42:07 -0600 Subject: [PATCH] feat(helm): update helm search Switch 'helm search' from file crawling to using the indices. Also add scorable indexing, forward porting the search code I originally wrote for Helm Classic. Closes #1226 Partially addresses #1199 --- cmd/helm/helm.go | 1 + cmd/helm/search.go | 125 +++++----- cmd/helm/search/search.go | 183 ++++++++++++++ cmd/helm/search/search_test.go | 232 ++++++++++++++++++ cmd/helm/search_test.go | 121 +++++---- .../repository/cache/testing-index.yaml | 54 ++++ .../helmhome/repository/local/index.yaml | 0 .../helmhome/repository/repositories.yaml | 1 + 8 files changed, 593 insertions(+), 124 deletions(-) create mode 100644 cmd/helm/search/search.go create mode 100644 cmd/helm/search/search_test.go create mode 100644 cmd/helm/testdata/helmhome/repository/cache/testing-index.yaml create mode 100644 cmd/helm/testdata/helmhome/repository/local/index.yaml create mode 100644 cmd/helm/testdata/helmhome/repository/repositories.yaml diff --git a/cmd/helm/helm.go b/cmd/helm/helm.go index 11e0f88d6..4c076b068 100644 --- a/cmd/helm/helm.go +++ b/cmd/helm/helm.go @@ -101,6 +101,7 @@ func newRootCmd(out io.Writer) *cobra.Command { newVersionCmd(nil, out), newRepoCmd(out), newDependencyCmd(out), + newSearchCmd(out), ) return cmd } diff --git a/cmd/helm/search.go b/cmd/helm/search.go index e1a465c99..625b980bd 100644 --- a/cmd/helm/search.go +++ b/cmd/helm/search.go @@ -17,90 +17,99 @@ limitations under the License. package main import ( - "errors" "fmt" - "os" - "path/filepath" + "io" "strings" "github.com/spf13/cobra" + "k8s.io/helm/cmd/helm/helmpath" + "k8s.io/helm/cmd/helm/search" "k8s.io/helm/pkg/repo" ) -func init() { - RootCommand.AddCommand(searchCmd) +const searchDesc = ` +Search reads through all of the repositories configured on the system, and +looks for matches. + +Repositories are managed with 'helm repo' commands. +` + +// searchMaxScore suggests that any score higher than this is not considered a match. +const searchMaxScore = 25 + +type searchCmd struct { + out io.Writer + helmhome helmpath.Home + + regexp bool } -var searchCmd = &cobra.Command{ - Use: "search [keyword]", - Short: "search for a keyword in charts", - Long: "Searches the known repositories cache files for the specified search string, looks at name and keywords", - RunE: search, - PreRunE: requireInit, +func newSearchCmd(out io.Writer) *cobra.Command { + sc := &searchCmd{out: out, helmhome: helmpath.Home(homePath())} + + cmd := &cobra.Command{ + Use: "search [keyword]", + Short: "search for a keyword in charts", + Long: searchDesc, + RunE: func(cmd *cobra.Command, args []string) error { + return sc.run(args) + }, + PreRunE: requireInit, + } + + cmd.Flags().BoolVarP(&sc.regexp, "regexp", "r", false, "use regular expressions for searching") + + return cmd } -func search(cmd *cobra.Command, args []string) error { +func (s *searchCmd) run(args []string) error { + index, err := s.buildIndex() + if err != nil { + return err + } + if len(args) == 0 { - return errors.New("This command needs at least one argument (search string)") + s.showAllCharts(index) } - // TODO: This needs to be refactored to use loadChartRepositories - results, err := searchCacheForPattern(cacheDirectory(), args[0]) + q := strings.Join(args, " ") + res, err := index.Search(q, searchMaxScore, s.regexp) if err != nil { - return err + return nil } - if len(results) > 0 { - for _, result := range results { - fmt.Println(result) - } + search.SortScore(res) + + for _, r := range res { + fmt.Fprintln(s.out, r.Name) } + return nil } -func searchChartRefsForPattern(search string, chartRefs map[string]*repo.ChartRef) []string { - matches := []string{} - for k, c := range chartRefs { - if strings.Contains(c.Name, search) && !c.Removed { - matches = append(matches, k) - continue - } - if c.Chartfile == nil { - continue - } - for _, keyword := range c.Chartfile.Keywords { - if strings.Contains(keyword, search) { - matches = append(matches, k) - } - } +func (s *searchCmd) showAllCharts(i *search.Index) { + for name := range i.Entries() { + fmt.Fprintln(s.out, name) } - return matches } -func searchCacheForPattern(dir string, search string) ([]string, error) { - fileList := []string{} - filepath.Walk(dir, func(path string, f os.FileInfo, err error) error { - if !f.IsDir() { - fileList = append(fileList, path) - } - return nil - }) - matches := []string{} - for _, f := range fileList { - index, err := repo.LoadIndexFile(f) +func (s *searchCmd) buildIndex() (*search.Index, error) { + // Load the repositories.yaml + rf, err := repo.LoadRepositoriesFile(s.helmhome.RepositoryFile()) + if err != nil { + return nil, err + } + + i := search.NewIndex() + for n := range rf.Repositories { + f := s.helmhome.CacheIndex(n) + ind, err := repo.LoadIndexFile(f) if err != nil { - return matches, fmt.Errorf("index %s corrupted: %s", f, err) + fmt.Fprintf(s.out, "WARNING: Repo %q is corrupt. Try 'helm update': %s", f, err) + continue } - m := searchChartRefsForPattern(search, index.Entries) - repoName := strings.TrimSuffix(filepath.Base(f), "-index.yaml") - for _, c := range m { - // TODO: Is it possible for this file to be missing? Or to have - // an extension other than .tgz? Should the actual filename be in - // the YAML? - fname := filepath.Join(repoName, c+".tgz") - matches = append(matches, fname) - } + i.AddRepo(n, ind) } - return matches, nil + return i, nil } diff --git a/cmd/helm/search/search.go b/cmd/helm/search/search.go new file mode 100644 index 000000000..0dfb72add --- /dev/null +++ b/cmd/helm/search/search.go @@ -0,0 +1,183 @@ +/* +Copyright 2016 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +/*Package search provides client-side repository searching. + +This supports building an in-memory search index based on the contents of +multiple repositories, and then using string matching or regular expressions +to find matches. +*/ +package search + +import ( + "errors" + "path/filepath" + "regexp" + "sort" + "strings" + + "k8s.io/helm/pkg/repo" +) + +// Result is a search result. +// +// Score indicates how close it is to match. The higher the score, the longer +// the distance. +type Result struct { + Name string + Score int +} + +// Index is a searchable index of chart information. +type Index struct { + lines map[string]string + charts map[string]*repo.ChartRef +} + +const sep = "\v" + +// NewIndex creats a new Index. +func NewIndex() *Index { + return &Index{lines: map[string]string{}, charts: map[string]*repo.ChartRef{}} +} + +// AddRepo adds a repository index to the search index. +func (i *Index) AddRepo(rname string, ind *repo.IndexFile) { + for name, ref := range ind.Entries { + fname := filepath.Join(rname, name) + i.lines[fname] = indstr(rname, ref) + i.charts[fname] = ref + } +} + +// Entries returns the entries in an index. +func (i *Index) Entries() map[string]*repo.ChartRef { + return i.charts +} + +// Search searches an index for the given term. +// +// Threshold indicates the maximum score a term may have before being marked +// irrelevant. (Low score means higher relevance. Golf, not bowling.) +// +// If regexp is true, the term is treated as a regular expression. Otherwise, +// term is treated as a literal string. +func (i *Index) Search(term string, threshold int, regexp bool) ([]*Result, error) { + if regexp == true { + return i.SearchRegexp(term, threshold) + } + return i.SearchLiteral(term, threshold), nil +} + +// calcScore calculates a score for a match. +func (i *Index) calcScore(index int, matchline string) int { + + // This is currently tied to the fact that sep is a single char. + splits := []int{} + s := rune(sep[0]) + for i, ch := range matchline { + if ch == s { + splits = append(splits, i) + } + } + + for i, pos := range splits { + if index > pos { + continue + } + return i + } + return len(splits) +} + +// SearchLiteral does a literal string search (no regexp). +func (i *Index) SearchLiteral(term string, threshold int) []*Result { + term = strings.ToLower(term) + buf := []*Result{} + for k, v := range i.lines { + res := strings.Index(v, term) + if score := i.calcScore(res, v); res != -1 && score < threshold { + buf = append(buf, &Result{Name: k, Score: score}) + } + } + return buf +} + +// SearchRegexp searches using a regular expression. +func (i *Index) SearchRegexp(re string, threshold int) ([]*Result, error) { + matcher, err := regexp.Compile(re) + if err != nil { + return []*Result{}, err + } + buf := []*Result{} + for k, v := range i.lines { + ind := matcher.FindStringIndex(v) + if len(ind) == 0 { + continue + } + if score := i.calcScore(ind[0], v); ind[0] >= 0 && score < threshold { + buf = append(buf, &Result{Name: k, Score: score}) + } + } + return buf, nil +} + +// Chart returns the ChartRef for a particular name. +func (i *Index) Chart(name string) (*repo.ChartRef, error) { + c, ok := i.charts[name] + if !ok { + return nil, errors.New("no such chart") + } + return c, nil +} + +// SortScore does an in-place sort of the results. +// +// Lowest scores are highest on the list. Matching scores are subsorted alphabetically. +func SortScore(r []*Result) { + sort.Sort(scoreSorter(r)) +} + +// scoreSorter sorts results by score, and subsorts by alpha Name. +type scoreSorter []*Result + +// Len returns the length of this scoreSorter. +func (s scoreSorter) Len() int { return len(s) } + +// Swap performs an in-place swap. +func (s scoreSorter) Swap(i, j int) { s[i], s[j] = s[j], s[i] } + +// Less compares a to b, and returns true if a is less than b. +func (s scoreSorter) Less(a, b int) bool { + first := s[a] + second := s[b] + + if first.Score > second.Score { + return false + } + if first.Score < second.Score { + return true + } + return first.Name < second.Name +} + +func indstr(name string, ref *repo.ChartRef) string { + i := ref.Name + sep + name + "/" + ref.Name + sep + if ref.Chartfile != nil { + i += ref.Chartfile.Description + sep + strings.Join(ref.Chartfile.Keywords, sep) + } + return strings.ToLower(i) +} diff --git a/cmd/helm/search/search_test.go b/cmd/helm/search/search_test.go new file mode 100644 index 000000000..e20b8e756 --- /dev/null +++ b/cmd/helm/search/search_test.go @@ -0,0 +1,232 @@ +/* +Copyright 2016 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package search + +import ( + "strings" + "testing" + + "k8s.io/helm/pkg/proto/hapi/chart" + "k8s.io/helm/pkg/repo" +) + +func TestSortScore(t *testing.T) { + in := []*Result{ + {Name: "bbb", Score: 0}, + {Name: "aaa", Score: 5}, + {Name: "abb", Score: 5}, + {Name: "aab", Score: 0}, + {Name: "bab", Score: 5}, + } + expect := []string{"aab", "bbb", "aaa", "abb", "bab"} + expectScore := []int{0, 0, 5, 5, 5} + SortScore(in) + + // Test Score + for i := 0; i < len(expectScore); i++ { + if expectScore[i] != in[i].Score { + t.Errorf("Sort error on index %d: expected %d, got %d", i, expectScore[i], in[i].Score) + } + } + // Test Name + for i := 0; i < len(expect); i++ { + if expect[i] != in[i].Name { + t.Errorf("Sort error: expected %s, got %s", expect[i], in[i].Name) + } + } +} + +var testCacheDir = "../testdata/" + +var indexfileEntries = map[string]*repo.ChartRef{ + "niГ±a-0.1.0": { + Name: "niГ±a", + URL: "http://example.com/charts/nina-0.1.0.tgz", + Chartfile: &chart.Metadata{ + Name: "niГ±a", + Version: "0.1.0", + Description: "One boat", + }, + }, + "pinta-0.1.0": { + Name: "pinta", + URL: "http://example.com/charts/pinta-0.1.0.tgz", + Chartfile: &chart.Metadata{ + Name: "pinta", + Version: "0.1.0", + Description: "Two ship", + }, + }, + "santa-maria-1.2.3": { + Name: "santa-maria", + URL: "http://example.com/charts/santa-maria-1.2.3.tgz", + Chartfile: &chart.Metadata{ + Name: "santa-maria", + Version: "1.2.3", + Description: "Three boat", + }, + }, +} + +func loadTestIndex(t *testing.T) *Index { + i := NewIndex() + i.AddRepo("testing", &repo.IndexFile{Entries: indexfileEntries}) + i.AddRepo("ztesting", &repo.IndexFile{Entries: map[string]*repo.ChartRef{ + "pinta-2.0.0": { + Name: "pinta", + URL: "http://example.com/charts/pinta-2.0.0.tgz", + Chartfile: &chart.Metadata{ + Name: "pinta", + Version: "2.0.0", + Description: "Two ship, version two", + }, + }, + }}) + return i +} + +func TestSearchByName(t *testing.T) { + + tests := []struct { + name string + query string + expect []*Result + regexp bool + fail bool + failMsg string + }{ + { + name: "basic search for one result", + query: "santa-maria", + expect: []*Result{ + {Name: "testing/santa-maria-1.2.3"}, + }, + }, + { + name: "basic search for two results", + query: "pinta", + expect: []*Result{ + {Name: "testing/pinta-0.1.0"}, + {Name: "ztesting/pinta-2.0.0"}, + }, + }, + { + name: "repo-specific search for one result", + query: "ztesting/pinta", + expect: []*Result{ + {Name: "ztesting/pinta-2.0.0"}, + }, + }, + { + name: "partial name search", + query: "santa", + expect: []*Result{ + {Name: "testing/santa-maria-1.2.3"}, + }, + }, + { + name: "description search, one result", + query: "Three", + expect: []*Result{ + {Name: "testing/santa-maria-1.2.3"}, + }, + }, + { + name: "description search, two results", + query: "two", + expect: []*Result{ + {Name: "testing/pinta-0.1.0"}, + {Name: "ztesting/pinta-2.0.0"}, + }, + }, + { + name: "nothing found", + query: "mayflower", + expect: []*Result{}, + }, + { + name: "regexp, one result", + query: "th[ref]*", + expect: []*Result{ + {Name: "testing/santa-maria-1.2.3"}, + }, + regexp: true, + }, + { + name: "regexp, fail compile", + query: "th[", + expect: []*Result{}, + regexp: true, + fail: true, + failMsg: "error parsing regexp:", + }, + } + + i := loadTestIndex(t) + + for _, tt := range tests { + + charts, err := i.Search(tt.query, 100, tt.regexp) + if err != nil { + if tt.fail { + if !strings.Contains(err.Error(), tt.failMsg) { + t.Fatalf("%s: Unexpected error message: %s", tt.name, err) + } + continue + } + t.Fatalf("%s: %s", tt.name, err) + } + // Give us predictably ordered results. + SortScore(charts) + + l := len(charts) + if l != len(tt.expect) { + t.Fatalf("%s: Expected %d result, got %d", tt.name, len(tt.expect), l) + } + // For empty result sets, just keep going. + if l == 0 { + continue + } + + for i, got := range charts { + ex := tt.expect[i] + if got.Name != ex.Name { + t.Errorf("%s[%d]: Expected name %q, got %q", tt.name, i, ex.Name, got.Name) + } + } + + } +} + +func TestCalcScore(t *testing.T) { + i := NewIndex() + + fields := []string{"aaa", "bbb", "ccc", "ddd"} + matchline := strings.Join(fields, sep) + if r := i.calcScore(2, matchline); r != 0 { + t.Errorf("Expected 0, got %d", r) + } + if r := i.calcScore(5, matchline); r != 1 { + t.Errorf("Expected 1, got %d", r) + } + if r := i.calcScore(10, matchline); r != 2 { + t.Errorf("Expected 2, got %d", r) + } + if r := i.calcScore(14, matchline); r != 3 { + t.Errorf("Expected 3, got %d", r) + } +} diff --git a/cmd/helm/search_test.go b/cmd/helm/search_test.go index 0869551aa..ffd4493fe 100644 --- a/cmd/helm/search_test.go +++ b/cmd/helm/search_test.go @@ -17,79 +17,68 @@ limitations under the License. package main import ( + "bytes" + "strings" "testing" - - "k8s.io/helm/pkg/repo" ) -const testDir = "testdata/testcache" -const testFile = "testdata/testcache/local-index.yaml" - -type searchTestCase struct { - in string - expectedOut []string -} - -var searchTestCases = []searchTestCase{ - {"foo", []string{}}, - {"alpine", []string{"alpine-1.0.0"}}, - {"sumtin", []string{"alpine-1.0.0"}}, - {"web", []string{"nginx-0.1.0"}}, -} +func TestSearchCmd(t *testing.T) { + tests := []struct { + name string + args []string + flags []string + expect string + regexp bool + fail bool + }{ + { + name: "search for 'maria', expect one match", + args: []string{"maria"}, + expect: "testing/mariadb-0.3.0", + }, + { + name: "search for 'alpine', expect two matches", + args: []string{"alpine"}, + expect: "testing/alpine-0.1.0\ntesting/alpine-0.2.0", + }, + { + name: "search for 'syzygy', expect no matches", + args: []string{"syzygy"}, + expect: "", + }, + { + name: "search for 'alp[a-z]+', expect two matches", + args: []string{"alp[a-z]+"}, + flags: []string{"--regexp"}, + expect: "testing/alpine-0.1.0\ntesting/alpine-0.2.0", + regexp: true, + }, + { + name: "search for 'alp[', expect failure to compile regexp", + args: []string{"alp["}, + flags: []string{"--regexp"}, + regexp: true, + fail: true, + }, + } -var searchCacheTestCases = []searchTestCase{ - {"notthere", []string{}}, - {"odd", []string{"foobar/oddness-1.2.3.tgz"}}, - {"sumtin", []string{"local/alpine-1.0.0.tgz", "foobar/oddness-1.2.3.tgz"}}, - {"foobar", []string{"foobar/foobar-0.1.0.tgz"}}, - {"web", []string{"local/nginx-0.1.0.tgz"}}, -} + oldhome := helmHome + helmHome = "testdata/helmhome" + defer func() { helmHome = oldhome }() -func validateEntries(t *testing.T, in string, found []string, expected []string) { - if len(found) != len(expected) { - t.Errorf("Failed to search %s: Expected: %#v got: %#v", in, expected, found) - } - foundCount := 0 - for _, exp := range expected { - for _, f := range found { - if exp == f { - foundCount = foundCount + 1 + for _, tt := range tests { + buf := bytes.NewBuffer(nil) + cmd := newSearchCmd(buf) + cmd.ParseFlags(tt.flags) + if err := cmd.RunE(cmd, tt.args); err != nil { + if tt.fail { continue } + t.Fatalf("%s: unexpected error %s", tt.name, err) + } + got := strings.TrimSpace(buf.String()) + if got != tt.expect { + t.Errorf("%s: expected %q, got %q", tt.name, tt.expect, got) } - } - if foundCount != len(expected) { - t.Errorf("Failed to find expected items for %s: Expected: %#v got: %#v", in, expected, found) - } - -} - -func searchTestRunner(t *testing.T, tc searchTestCase) { - cf, err := repo.LoadIndexFile(testFile) - if err != nil { - t.Errorf("Failed to load index file : %s : %s", testFile, err) - } - - u := searchChartRefsForPattern(tc.in, cf.Entries) - validateEntries(t, tc.in, u, tc.expectedOut) -} - -func searchCacheTestRunner(t *testing.T, tc searchTestCase) { - u, err := searchCacheForPattern(testDir, tc.in) - if err != nil { - t.Errorf("searchCacheForPattern failed: %#v", err) - } - validateEntries(t, tc.in, u, tc.expectedOut) -} - -func TestSearches(t *testing.T) { - for _, tc := range searchTestCases { - searchTestRunner(t, tc) - } -} - -func TestCacheSearches(t *testing.T) { - for _, tc := range searchCacheTestCases { - searchCacheTestRunner(t, tc) } } diff --git a/cmd/helm/testdata/helmhome/repository/cache/testing-index.yaml b/cmd/helm/testdata/helmhome/repository/cache/testing-index.yaml new file mode 100644 index 000000000..67595882a --- /dev/null +++ b/cmd/helm/testdata/helmhome/repository/cache/testing-index.yaml @@ -0,0 +1,54 @@ +alpine-0.1.0: + name: alpine + url: http://storage.googleapis.com/kubernetes-charts/alpine-0.1.0.tgz + created: 2016-09-06 21:58:44.211261566 +0000 UTC + checksum: 0e6661f193211d7a5206918d42f5c2a9470b737d + chartfile: + name: alpine + home: https://k8s.io/helm + sources: + - https://github.com/kubernetes/helm + version: 0.1.0 + description: Deploy a basic Alpine Linux pod + keywords: [] + maintainers: [] + engine: "" + icon: "" +alpine-0.2.0: + name: alpine + url: http://storage.googleapis.com/kubernetes-charts/alpine-0.2.0.tgz + created: 2016-09-06 21:58:44.211261566 +0000 UTC + checksum: 0e6661f193211d7a5206918d42f5c2a9470b737d + chartfile: + name: alpine + home: https://k8s.io/helm + sources: + - https://github.com/kubernetes/helm + version: 0.2.0 + description: Deploy a basic Alpine Linux pod + keywords: [] + maintainers: [] + engine: "" + icon: "" +mariadb-0.3.0: + name: mariadb + url: http://storage.googleapis.com/kubernetes-charts/mariadb-0.3.0.tgz + created: 2016-09-06 21:58:44.211870222 +0000 UTC + checksum: 65229f6de44a2be9f215d11dbff311673fc8ba56 + chartfile: + name: mariadb + home: https://mariadb.org + sources: + - https://github.com/bitnami/bitnami-docker-mariadb + version: 0.3.0 + description: Chart for MariaDB + keywords: + - mariadb + - mysql + - database + - sql + maintainers: + - name: Bitnami + email: containers@bitnami.com + engine: gotpl + icon: "" diff --git a/cmd/helm/testdata/helmhome/repository/local/index.yaml b/cmd/helm/testdata/helmhome/repository/local/index.yaml new file mode 100644 index 000000000..e69de29bb diff --git a/cmd/helm/testdata/helmhome/repository/repositories.yaml b/cmd/helm/testdata/helmhome/repository/repositories.yaml new file mode 100644 index 000000000..cd16e634a --- /dev/null +++ b/cmd/helm/testdata/helmhome/repository/repositories.yaml @@ -0,0 +1 @@ +testing: http://example.com/charts -- GitLab