From 24f4854dbce99bdfe74a1d3cc84adba99d9925d4 Mon Sep 17 00:00:00 2001 From: ZheNing Hu Date: Sat, 27 May 2023 22:15:38 +0800 Subject: [PATCH 1/2] wip(search): add /index and /search api Signed-off-by: ZheNing Hu --- go.mod | 2 + go.sum | 4 + pkg/router/router.go | 5 +- pkg/service/search/elastic_search.go | 123 +++++++++++++++++++++++++++ 4 files changed, 133 insertions(+), 1 deletion(-) create mode 100644 pkg/service/search/elastic_search.go diff --git a/go.mod b/go.mod index 4de6f7a..2dbbbaa 100644 --- a/go.mod +++ b/go.mod @@ -3,6 +3,7 @@ module github.com/adlternative/tinygithub go 1.20 require ( + github.com/elastic/go-elasticsearch/v8 v8.4.0-alpha.1.0.20230525132218-317dd3ac737f github.com/gin-contrib/sessions v0.0.5 github.com/gin-gonic/gin v1.9.0 github.com/git-lfs/pktline v0.0.0-20230103162542-ca444d533ef1 @@ -21,6 +22,7 @@ require ( github.com/bytedance/sonic v1.8.0 // indirect github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 // indirect github.com/davecgh/go-spew v1.1.1 // indirect + github.com/elastic/elastic-transport-go/v8 v8.0.0-20230329154755-1a3c63de0db6 // indirect github.com/fsnotify/fsnotify v1.6.0 // indirect github.com/gin-contrib/sse v0.1.0 // indirect github.com/go-playground/locales v0.14.1 // indirect diff --git a/go.sum b/go.sum index e8658e3..b4b26d5 100644 --- a/go.sum +++ b/go.sum @@ -57,6 +57,10 @@ github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46t github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/elastic/elastic-transport-go/v8 v8.0.0-20230329154755-1a3c63de0db6 h1:1+44gxLdKRnR/Bx/iAtr+XqNcE4e0oODa63+FABNANI= +github.com/elastic/elastic-transport-go/v8 v8.0.0-20230329154755-1a3c63de0db6/go.mod h1:87Tcz8IVNe6rVSLdBux1o/PEItLtyabHU3naC7IoqKI= +github.com/elastic/go-elasticsearch/v8 v8.4.0-alpha.1.0.20230525132218-317dd3ac737f h1:cBQtEqdUPkCYUU1RNqZ5G8QCIMBlFjuwnjgfoTaAcwo= +github.com/elastic/go-elasticsearch/v8 v8.4.0-alpha.1.0.20230525132218-317dd3ac737f/go.mod h1:NGmpvohKiRHXI0Sw4fuUGn6hYOmAXlyCphKpzVBiqDE= github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= diff --git a/pkg/router/router.go b/pkg/router/router.go index 847f0b6..a8ef170 100644 --- a/pkg/router/router.go +++ b/pkg/router/router.go @@ -10,6 +10,7 @@ import ( "github.com/adlternative/tinygithub/pkg/service/home" "github.com/adlternative/tinygithub/pkg/service/pack" "github.com/adlternative/tinygithub/pkg/service/repo" + "github.com/adlternative/tinygithub/pkg/service/search" "github.com/adlternative/tinygithub/pkg/service/tags" "github.com/adlternative/tinygithub/pkg/service/tree" "github.com/adlternative/tinygithub/pkg/service/user" @@ -203,9 +204,11 @@ func Run(store *storage.Storage, dbEngine *model.DBEngine) error { } } } - + v2Group.POST("/index", search.Index(dbEngine)) + v2Group.GET("/search", search.Query(dbEngine)) } } + } err := r.SetTrustedProxies([]string{"127.0.0.1"}) if err != nil { diff --git a/pkg/service/search/elastic_search.go b/pkg/service/search/elastic_search.go new file mode 100644 index 0000000..23f98c0 --- /dev/null +++ b/pkg/service/search/elastic_search.go @@ -0,0 +1,123 @@ +package search + +import ( + "encoding/base64" + "fmt" + "net/http" + + "github.com/adlternative/tinygithub/pkg/model" + "github.com/elastic/go-elasticsearch/v8" + "github.com/elastic/go-elasticsearch/v8/typedapi/core/search" + "github.com/elastic/go-elasticsearch/v8/typedapi/types" + "github.com/elastic/go-elasticsearch/v8/typedapi/types/enums/refresh" + "github.com/gin-gonic/gin" +) + +const gitBlobInfoIndexName = "gitblob" + +type gitBlobInfo struct { + RepoName string `json:"repoName"` + Revision string `json:"revision"` + FilePath string `json:"filePath"` + BlobID string `json:"blobID"` + Language string `json:"language"` + + Contents []byte `json:"contents"` +} + +func (g *gitBlobInfo) ID() string { + return fmt.Sprintf("%s:%s", g.RepoName, g.BlobID) +} + +func Index(db *model.DBEngine) gin.HandlerFunc { + return func(c *gin.Context) { + var gitBlobInfo gitBlobInfo + if err := c.BindJSON(&gitBlobInfo); err != nil { + c.JSON(http.StatusBadRequest, gin.H{ + "error": err.Error(), + }) + return + } + contents := make([]byte, base64.StdEncoding.DecodedLen(len(gitBlobInfo.Contents))) + _, err := base64.RawStdEncoding.Decode(contents, gitBlobInfo.Contents) + if err != nil { + c.JSON(http.StatusBadRequest, gin.H{ + "error": err.Error(), + }) + return + } + gitBlobInfo.Contents = contents + + es, err := elasticsearch.NewTypedClient(elasticsearch.Config{}) + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{ + "error": err.Error(), + }) + return + } + + response, err := es.Index(gitBlobInfoIndexName). + Request(&gitBlobInfo). + Id(gitBlobInfo.ID()). + Refresh(refresh.Waitfor). + Do(c) + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{ + "error": err.Error(), + }) + return + } + c.JSON(http.StatusOK, response) + return + } +} + +func Query(db *model.DBEngine) gin.HandlerFunc { + return func(c *gin.Context) { + repoName := c.Query("repoName") + revision := c.Query("revision") + filePath := c.Query("filePath") + blobID := c.Query("blobID") + language := c.Query("language") + queryString := c.Query("query") + + es, err := elasticsearch.NewTypedClient(elasticsearch.Config{}) + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{ + "error": err, + }) + return + } + boolQuery := types.BoolQuery{} + + queryAppend(boolQuery.Filter, "repoName", repoName) + queryAppend(boolQuery.Filter, "filePath", filePath) + queryAppend(boolQuery.Filter, "revision", revision) + queryAppend(boolQuery.Must, "blobID", blobID) + queryAppend(boolQuery.Must, "language", language) + queryAppend(boolQuery.Must, "contents", queryString) + + res, err := es.Search().Index(gitBlobInfoIndexName).TrackTotalHits("true"). + Request(&search.Request{ + Query: &types.Query{ + Bool: &boolQuery, + }, + }).Do(c) + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{ + "error": err, + }) + return + } + c.JSON(http.StatusOK, res) + } +} + +func queryAppend(query []types.Query, prop, value string) []types.Query { + if value != "" { + query = append(query, types.Query{ + Term: map[string]types.TermQuery{prop: {Value: value}}, + }) + } + return query +} From 1a86ab583432fc14d3c30eb567f488517095b6c4 Mon Sep 17 00:00:00 2001 From: ZheNing Hu Date: Wed, 31 May 2023 20:47:12 +0800 Subject: [PATCH 2/2] WIP(search): add search CRUD api Signed-off-by: ZheNing Hu --- pkg/router/router.go | 11 +- pkg/service/search/elastic_search.go | 260 +++++++++++++++++++--- pkg/service/search/elastic_search_test.go | 129 +++++++++++ 3 files changed, 363 insertions(+), 37 deletions(-) create mode 100644 pkg/service/search/elastic_search_test.go diff --git a/pkg/router/router.go b/pkg/router/router.go index a8ef170..1c1c498 100644 --- a/pkg/router/router.go +++ b/pkg/router/router.go @@ -180,6 +180,15 @@ func Run(store *storage.Storage, dbEngine *model.DBEngine) error { v2ReposGroup.POST("/delete", repo.DeleteV2(dbEngine, store)) } + v2SearchTestGroup := v2Group.Group("/_search_test") + { + v2SearchTestGroup.POST("/index", search.CreateIndex(dbEngine)) + v2SearchTestGroup.DELETE("/index", search.DeleteIndex(dbEngine)) + + v2SearchTestGroup.POST("/docs", search.CreateDocs(dbEngine)) + v2SearchTestGroup.POST("/search", search.QueryDocs(dbEngine)) + } + v2UserNameGroup := v2Group.Group("/:username") { v2RepoGroup := v2UserNameGroup.Group("/:reponame") @@ -204,8 +213,6 @@ func Run(store *storage.Storage, dbEngine *model.DBEngine) error { } } } - v2Group.POST("/index", search.Index(dbEngine)) - v2Group.GET("/search", search.Query(dbEngine)) } } diff --git a/pkg/service/search/elastic_search.go b/pkg/service/search/elastic_search.go index 23f98c0..10cc172 100644 --- a/pkg/service/search/elastic_search.go +++ b/pkg/service/search/elastic_search.go @@ -1,8 +1,10 @@ package search import ( - "encoding/base64" + "encoding/json" "fmt" + "github.com/elastic/go-elasticsearch/v8/typedapi/indices/create" + log "github.com/sirupsen/logrus" "net/http" "github.com/adlternative/tinygithub/pkg/model" @@ -13,40 +15,191 @@ import ( "github.com/gin-gonic/gin" ) -const gitBlobInfoIndexName = "gitblob" +const gitBlobInfoIndexName = "gitblobs" -type gitBlobInfo struct { +type GitBlobInfo struct { RepoName string `json:"repoName"` Revision string `json:"revision"` FilePath string `json:"filePath"` BlobID string `json:"blobID"` Language string `json:"language"` - Contents []byte `json:"contents"` + Contents string `json:"contents"` } -func (g *gitBlobInfo) ID() string { +func (g *GitBlobInfo) ID() string { return fmt.Sprintf("%s:%s", g.RepoName, g.BlobID) } -func Index(db *model.DBEngine) gin.HandlerFunc { +func NewInSensitiveTextProperty() *types.TextProperty { + inSensitive := types.NewTextProperty() + + lowercase := "standard_lowercase" + inSensitive.Analyzer = &lowercase + + return inSensitive +} + +func NewTextProperty() *types.TextProperty { + // text + // match -> insensitive part ok + // term -> part ok(only lowercase ok) + // term -> full x + return types.NewTextProperty() +} + +func NewTextCaseProperty() *types.TextProperty { + property := types.NewTextProperty() + + keywordAnalyzer := "ngram_analyzer" + property.Analyzer = &keywordAnalyzer + return property +} + +func NewKeyWordsProperty() *types.KeywordProperty { + property := types.NewKeywordProperty() + ignoreAbove := 256 + property.IgnoreAbove = &ignoreAbove + return property +} + +func NewLowercaseKeyWordsProperty() *types.KeywordProperty { + property := types.NewKeywordProperty() + + normalizer := "lowercase_normalizer" + property.Normalizer = &normalizer + + ignoreAbove := 256 + property.IgnoreAbove = &ignoreAbove + + return property +} + +func NewLowercaseNormalizer() *types.CustomNormalizer { + property := types.NewCustomNormalizer() + filter := "lowercase" + + property.Filter = append(property.Filter, filter) + + return property +} + +func NewTextWithKeyWordsProperty() *types.TextProperty { + property := types.NewTextProperty() + property.Fields = map[string]types.Property{ + "keyword": NewKeyWordsProperty(), + } + return property +} + +func NewLowerCaseAnalyzer() *types.CustomAnalyzer { + standardLowercaseAnalyzer := types.NewCustomAnalyzer() + standardLowercaseAnalyzer.Tokenizer = "standard" + standardLowercaseAnalyzer.Filter = []string{"lowercase"} + return standardLowercaseAnalyzer +} + +func NewKeywordAnalyzer() *types.CustomAnalyzer { + standardLowercaseAnalyzer := types.NewCustomAnalyzer() + standardLowercaseAnalyzer.Tokenizer = "keyword" + standardLowercaseAnalyzer.Filter = []string{"lowercase"} + return standardLowercaseAnalyzer +} + +func NewIndexSettings() *types.IndexSettings { + setting := types.NewIndexSettings() + setting.Analysis = types.NewIndexSettingsAnalysis() + + ngramAnalyzer := types.NewCustomAnalyzer() + ngramAnalyzer.Tokenizer = "ngram_tokenizer" + + ngramTokenizer := types.NewNGramTokenizer() + ngramTokenizer.MinGram = 2 + ngramTokenizer.MaxGram = 3 + + setting.Analysis.Analyzer = map[string]types.Analyzer{ + "ngram_analyzer": ngramAnalyzer, + } + setting.Analysis.Tokenizer = map[string]types.Tokenizer{ + "ngram_tokenizer": ngramTokenizer, + } + setting.Analysis.Normalizer = map[string]types.Normalizer{ + "lowercase_normalizer": types.NewLowercaseNormalizer(), + } + + return setting +} + +func CreateIndex(db *model.DBEngine) gin.HandlerFunc { return func(c *gin.Context) { - var gitBlobInfo gitBlobInfo - if err := c.BindJSON(&gitBlobInfo); err != nil { - c.JSON(http.StatusBadRequest, gin.H{ + es, err := elasticsearch.NewTypedClient(elasticsearch.Config{}) + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{ + "error": err.Error(), + }) + return + } + + res, err := es.Indices.Create(gitBlobInfoIndexName). + Request(&create.Request{ + Settings: NewIndexSettings(), + Mappings: &types.TypeMapping{ + Properties: map[string]types.Property{ + "blobID": NewKeyWordsProperty(), + "revision": NewKeyWordsProperty(), + "language": NewLowercaseKeyWordsProperty(), + "repoName": NewTextWithKeyWordsProperty(), + "filePath": NewTextWithKeyWordsProperty(), + + "contents": NewTextCaseProperty(), + }, + }, + }). + Do(c) + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{ + "error": err.Error(), + }) + return + } + + c.JSON(http.StatusOK, res) + return + } +} + +func DeleteIndex(db *model.DBEngine) gin.HandlerFunc { + return func(c *gin.Context) { + es, err := elasticsearch.NewTypedClient(elasticsearch.Config{}) + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{ "error": err.Error(), }) return } - contents := make([]byte, base64.StdEncoding.DecodedLen(len(gitBlobInfo.Contents))) - _, err := base64.RawStdEncoding.Decode(contents, gitBlobInfo.Contents) + + res, err := es.Indices.Delete(gitBlobInfoIndexName).Do(c) if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{ + "error": err.Error(), + }) + return + } + + c.JSON(http.StatusOK, res) + return + } +} + +func CreateDocs(db *model.DBEngine) gin.HandlerFunc { + return func(c *gin.Context) { + var gitBlobInfo GitBlobInfo + if err := c.BindJSON(&gitBlobInfo); err != nil { c.JSON(http.StatusBadRequest, gin.H{ "error": err.Error(), }) return } - gitBlobInfo.Contents = contents es, err := elasticsearch.NewTypedClient(elasticsearch.Config{}) if err != nil { @@ -72,30 +225,60 @@ func Index(db *model.DBEngine) gin.HandlerFunc { } } -func Query(db *model.DBEngine) gin.HandlerFunc { +func QueryDocs(db *model.DBEngine) gin.HandlerFunc { return func(c *gin.Context) { - repoName := c.Query("repoName") - revision := c.Query("revision") - filePath := c.Query("filePath") - blobID := c.Query("blobID") - language := c.Query("language") - queryString := c.Query("query") + var gitBlobInfo GitBlobInfo + if err := c.BindJSON(&gitBlobInfo); err != nil { + c.JSON(http.StatusBadRequest, gin.H{ + "error": err.Error(), + }) + return + } + + log.Debugf("query info: %v", gitBlobInfo) es, err := elasticsearch.NewTypedClient(elasticsearch.Config{}) if err != nil { c.JSON(http.StatusInternalServerError, gin.H{ - "error": err, + "error": err.Error(), }) return } boolQuery := types.BoolQuery{} - queryAppend(boolQuery.Filter, "repoName", repoName) - queryAppend(boolQuery.Filter, "filePath", filePath) - queryAppend(boolQuery.Filter, "revision", revision) - queryAppend(boolQuery.Must, "blobID", blobID) - queryAppend(boolQuery.Must, "language", language) - queryAppend(boolQuery.Must, "contents", queryString) + if gitBlobInfo.RepoName != "" { + boolQuery.Filter = append(boolQuery.Filter, types.Query{ + Term: map[string]types.TermQuery{"repoName.keyword": {Value: gitBlobInfo.RepoName}}, + }) + } + if gitBlobInfo.FilePath != "" { + boolQuery.Filter = append(boolQuery.Filter, types.Query{ + Term: map[string]types.TermQuery{"filePath.keyword": {Value: gitBlobInfo.FilePath}}, + }) + } + if gitBlobInfo.Revision != "" { + boolQuery.Filter = append(boolQuery.Filter, types.Query{ + Term: map[string]types.TermQuery{"revision": {Value: gitBlobInfo.Revision}}, + }) + } + if gitBlobInfo.BlobID != "" { + boolQuery.Filter = append(boolQuery.Filter, types.Query{ + Term: map[string]types.TermQuery{"blobID": {Value: gitBlobInfo.BlobID}}, + }) + } + if gitBlobInfo.Language != "" { + boolQuery.Filter = append(boolQuery.Filter, types.Query{ + Term: map[string]types.TermQuery{"language": {Value: gitBlobInfo.Language}}, + }) + } + if gitBlobInfo.Contents != "" { + boolQuery.Must = append(boolQuery.Must, + types.Query{ + Match: map[string]types.MatchQuery{ + "contents": {Query: gitBlobInfo.Contents}, + }, + }) + } res, err := es.Search().Index(gitBlobInfoIndexName).TrackTotalHits("true"). Request(&search.Request{ @@ -105,19 +288,26 @@ func Query(db *model.DBEngine) gin.HandlerFunc { }).Do(c) if err != nil { c.JSON(http.StatusInternalServerError, gin.H{ - "error": err, + "error": err.Error(), }) return } - c.JSON(http.StatusOK, res) - } -} -func queryAppend(query []types.Query, prop, value string) []types.Query { - if value != "" { - query = append(query, types.Query{ - Term: map[string]types.TermQuery{prop: {Value: value}}, + gitblobs := []*GitBlobInfo{} + for _, hit := range res.Hits.Hits { + var result *GitBlobInfo + + err := json.Unmarshal(hit.Source_, &result) + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{ + "error": err.Error(), + }) + return + } + gitblobs = append(gitblobs, result) + } + c.JSON(http.StatusOK, gin.H{ + "gitblobs": gitblobs, }) } - return query } diff --git a/pkg/service/search/elastic_search_test.go b/pkg/service/search/elastic_search_test.go new file mode 100644 index 0000000..64ee82a --- /dev/null +++ b/pkg/service/search/elastic_search_test.go @@ -0,0 +1,129 @@ +package search + +import ( + "bytes" + "encoding/json" + "github.com/stretchr/testify/require" + "io" + "net/http" + "os" + "testing" +) + +func TestDeleteIndex(t *testing.T) { + r := require.New(t) + + // 创建 HTTP 请求 + req, err := http.NewRequest("DELETE", "http://localhost:8083/api/v2/_search_test/index", nil) + r.NoError(err) + + // 发送 HTTP 请求并处理响应 + client := &http.Client{} + resp, err := client.Do(req) + r.NoError(err) + + _, err = io.Copy(os.Stdout, resp.Body) + r.NoError(err) + + err = resp.Body.Close() + r.NoError(err) +} + +func TestCreateIndex(t *testing.T) { + r := require.New(t) + // 创建 HTTP 请求 + req, err := http.NewRequest("POST", "http://localhost:8083/api/v2/_search_test/index", nil) + r.NoError(err) + + // 发送 HTTP 请求并处理响应 + client := &http.Client{} + resp, err := client.Do(req) + r.NoError(err) + + _, err = io.Copy(os.Stdout, resp.Body) + r.NoError(err) + + err = resp.Body.Close() + r.NoError(err) +} + +func TestCreateDocs(t *testing.T) { + r := require.New(t) + + // 创建一个包含多个 GitBlobInfo 实例的切片 + blobs := []GitBlobInfo{ + { + RepoName: "my-repo", + Revision: "main", + FilePath: "path/to/file1", + BlobID: "abc123", + Language: "go", + Contents: "package main\n\nimport \"fmt\"\n\nfunc main() {\n fmt.Println(\"Hello, World!\")\n}", + }, + { + RepoName: "my-repo", + Revision: "dev", + FilePath: "path/to/file2", + BlobID: "def456", + Language: "python", + Contents: "print('Hello, World!')", + }, + { + RepoName: "my-repo", + Revision: "main", + FilePath: "path/to/file3", + BlobID: "ghi789", + Language: "java", + Contents: "public class Main {\n public static void main(String[] args) {\n System.out.println(\"Hello, World!\");\n }\n}", + }, + } + + // 遍历 blobs 切片并发送 HTTP 请求 + for _, blob := range blobs { + jsonBlob, err := json.Marshal(blob) + r.NoError(err) + + // 创建 HTTP 请求 + req, err := http.NewRequest("POST", "http://localhost:8083/api/v2/_search_test/docs", bytes.NewBuffer(jsonBlob)) + r.NoError(err) + + req.Header.Set("Content-Type", "application/json") + + // 发送 HTTP 请求并处理响应 + client := &http.Client{} + resp, err := client.Do(req) + r.NoError(err) + + _, err = io.Copy(os.Stdout, resp.Body) + r.NoError(err) + + err = resp.Body.Close() + r.NoError(err) + } +} + +func TestQueryDocs(t *testing.T) { + r := require.New(t) + + blob := &GitBlobInfo{ + RepoName: "my-repo", + Contents: "main", + } + jsonBlob, err := json.Marshal(blob) + r.NoError(err) + + req, err := http.NewRequest("POST", "http://localhost:8083/api/v2/_search_test/search", bytes.NewBuffer(jsonBlob)) + r.NoError(err) + + req.Header.Set("Content-Type", "application/json") + + client := &http.Client{} + resp, err := client.Do(req) + r.NoError(err) + + _, err = io.Copy(os.Stdout, resp.Body) + r.NoError(err) + + err = resp.Body.Close() + r.NoError(err) +}