diff --git a/go.mod b/go.mod index 4de6f7a..2dbbbaa 100644 --- a/go.mod +++ b/go.mod @@ -3,6 +3,7 @@ module github.com/adlternative/tinygithub go 1.20 require ( + github.com/elastic/go-elasticsearch/v8 v8.4.0-alpha.1.0.20230525132218-317dd3ac737f github.com/gin-contrib/sessions v0.0.5 github.com/gin-gonic/gin v1.9.0 github.com/git-lfs/pktline v0.0.0-20230103162542-ca444d533ef1 @@ -21,6 +22,7 @@ require ( github.com/bytedance/sonic v1.8.0 // indirect github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 // indirect github.com/davecgh/go-spew v1.1.1 // indirect + github.com/elastic/elastic-transport-go/v8 v8.0.0-20230329154755-1a3c63de0db6 // indirect github.com/fsnotify/fsnotify v1.6.0 // indirect github.com/gin-contrib/sse v0.1.0 // indirect github.com/go-playground/locales v0.14.1 // indirect diff --git a/go.sum b/go.sum index e8658e3..b4b26d5 100644 --- a/go.sum +++ b/go.sum @@ -57,6 +57,10 @@ github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46t github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/elastic/elastic-transport-go/v8 v8.0.0-20230329154755-1a3c63de0db6 h1:1+44gxLdKRnR/Bx/iAtr+XqNcE4e0oODa63+FABNANI= +github.com/elastic/elastic-transport-go/v8 v8.0.0-20230329154755-1a3c63de0db6/go.mod h1:87Tcz8IVNe6rVSLdBux1o/PEItLtyabHU3naC7IoqKI= +github.com/elastic/go-elasticsearch/v8 v8.4.0-alpha.1.0.20230525132218-317dd3ac737f h1:cBQtEqdUPkCYUU1RNqZ5G8QCIMBlFjuwnjgfoTaAcwo= +github.com/elastic/go-elasticsearch/v8 v8.4.0-alpha.1.0.20230525132218-317dd3ac737f/go.mod h1:NGmpvohKiRHXI0Sw4fuUGn6hYOmAXlyCphKpzVBiqDE= github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= diff --git a/pkg/router/router.go b/pkg/router/router.go index 847f0b6..1c1c498 100644 --- a/pkg/router/router.go +++ b/pkg/router/router.go @@ -10,6 +10,7 @@ import ( "github.com/adlternative/tinygithub/pkg/service/home" "github.com/adlternative/tinygithub/pkg/service/pack" "github.com/adlternative/tinygithub/pkg/service/repo" + "github.com/adlternative/tinygithub/pkg/service/search" "github.com/adlternative/tinygithub/pkg/service/tags" "github.com/adlternative/tinygithub/pkg/service/tree" "github.com/adlternative/tinygithub/pkg/service/user" @@ -179,6 +180,15 @@ func Run(store *storage.Storage, dbEngine *model.DBEngine) error { v2ReposGroup.POST("/delete", repo.DeleteV2(dbEngine, store)) } + v2SearchTestGroup := v2Group.Group("/_search_test") + { + v2SearchTestGroup.POST("/index", search.CreateIndex(dbEngine)) + v2SearchTestGroup.DELETE("/index", search.DeleteIndex(dbEngine)) + + v2SearchTestGroup.POST("/docs", search.CreateDocs(dbEngine)) + v2SearchTestGroup.POST("/search", search.QueryDocs(dbEngine)) + } + v2UserNameGroup := v2Group.Group("/:username") { v2RepoGroup := v2UserNameGroup.Group("/:reponame") @@ -203,9 +213,9 @@ func Run(store *storage.Storage, dbEngine *model.DBEngine) error { } } } - } } + } err := r.SetTrustedProxies([]string{"127.0.0.1"}) if err != nil { diff --git a/pkg/service/search/elastic_search.go b/pkg/service/search/elastic_search.go new file mode 100644 index 0000000..10cc172 --- /dev/null +++ b/pkg/service/search/elastic_search.go @@ -0,0 +1,313 @@ +package search + +import ( + "encoding/json" + "fmt" + "github.com/elastic/go-elasticsearch/v8/typedapi/indices/create" + log "github.com/sirupsen/logrus" + "net/http" + + "github.com/adlternative/tinygithub/pkg/model" + "github.com/elastic/go-elasticsearch/v8" + "github.com/elastic/go-elasticsearch/v8/typedapi/core/search" + "github.com/elastic/go-elasticsearch/v8/typedapi/types" + "github.com/elastic/go-elasticsearch/v8/typedapi/types/enums/refresh" + "github.com/gin-gonic/gin" +) + +const gitBlobInfoIndexName = "gitblobs" + +type GitBlobInfo struct { + RepoName string `json:"repoName"` + Revision string `json:"revision"` + FilePath string `json:"filePath"` + BlobID string `json:"blobID"` + Language string `json:"language"` + + Contents string `json:"contents"` +} + +func (g *GitBlobInfo) ID() string { + return fmt.Sprintf("%s:%s", g.RepoName, g.BlobID) +} + +func NewInSensitiveTextProperty() *types.TextProperty { + inSensitive := types.NewTextProperty() + + lowercase := "standard_lowercase" + inSensitive.Analyzer = &lowercase + + return inSensitive +} + +func NewTextProperty() *types.TextProperty { + // text + // match -> insensitive part ok + // term -> part ok(only lowercase ok) + // term -> full x + return types.NewTextProperty() +} + +func NewTextCaseProperty() *types.TextProperty { + property := types.NewTextProperty() + + keywordAnalyzer := "ngram_analyzer" + property.Analyzer = &keywordAnalyzer + return property +} + +func NewKeyWordsProperty() *types.KeywordProperty { + property := types.NewKeywordProperty() + ignoreAbove := 256 + property.IgnoreAbove = &ignoreAbove + return property +} + +func NewLowercaseKeyWordsProperty() *types.KeywordProperty { + property := types.NewKeywordProperty() + + normalizer := "lowercase_normalizer" + property.Normalizer = &normalizer + + ignoreAbove := 256 + property.IgnoreAbove = &ignoreAbove + + return property +} + +func NewLowercaseNormalizer() *types.CustomNormalizer { + property := types.NewCustomNormalizer() + filter := "lowercase" + + property.Filter = append(property.Filter, filter) + + return property +} + +func NewTextWithKeyWordsProperty() *types.TextProperty { + property := types.NewTextProperty() + property.Fields = map[string]types.Property{ + "keyword": NewKeyWordsProperty(), + } + return property +} + +func NewLowerCaseAnalyzer() *types.CustomAnalyzer { + standardLowercaseAnalyzer := types.NewCustomAnalyzer() + standardLowercaseAnalyzer.Tokenizer = "standard" + standardLowercaseAnalyzer.Filter = []string{"lowercase"} + return standardLowercaseAnalyzer +} + +func NewKeywordAnalyzer() *types.CustomAnalyzer { + standardLowercaseAnalyzer := types.NewCustomAnalyzer() + standardLowercaseAnalyzer.Tokenizer = "keyword" + standardLowercaseAnalyzer.Filter = []string{"lowercase"} + return standardLowercaseAnalyzer +} + +func NewIndexSettings() *types.IndexSettings { + setting := types.NewIndexSettings() + setting.Analysis = types.NewIndexSettingsAnalysis() + + ngramAnalyzer := types.NewCustomAnalyzer() + ngramAnalyzer.Tokenizer = "ngram_tokenizer" + + ngramTokenizer := types.NewNGramTokenizer() + ngramTokenizer.MinGram = 2 + ngramTokenizer.MaxGram = 3 + + setting.Analysis.Analyzer = map[string]types.Analyzer{ + "ngram_analyzer": ngramAnalyzer, + } + setting.Analysis.Tokenizer = map[string]types.Tokenizer{ + "ngram_tokenizer": ngramTokenizer, + } + setting.Analysis.Normalizer = map[string]types.Normalizer{ + "lowercase_normalizer": types.NewLowercaseNormalizer(), + } + + return setting +} + +func CreateIndex(db *model.DBEngine) gin.HandlerFunc { + return func(c *gin.Context) { + es, err := elasticsearch.NewTypedClient(elasticsearch.Config{}) + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{ + "error": err.Error(), + }) + return + } + + res, err := es.Indices.Create(gitBlobInfoIndexName). + Request(&create.Request{ + Settings: NewIndexSettings(), + Mappings: &types.TypeMapping{ + Properties: map[string]types.Property{ + "blobID": NewKeyWordsProperty(), + "revision": NewKeyWordsProperty(), + "language": NewLowercaseKeyWordsProperty(), + "repoName": NewTextWithKeyWordsProperty(), + "filePath": NewTextWithKeyWordsProperty(), + + "contents": NewTextCaseProperty(), + }, + }, + }). + Do(c) + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{ + "error": err.Error(), + }) + return + } + + c.JSON(http.StatusOK, res) + return + } +} + +func DeleteIndex(db *model.DBEngine) gin.HandlerFunc { + return func(c *gin.Context) { + es, err := elasticsearch.NewTypedClient(elasticsearch.Config{}) + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{ + "error": err.Error(), + }) + return + } + + res, err := es.Indices.Delete(gitBlobInfoIndexName).Do(c) + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{ + "error": err.Error(), + }) + return + } + + c.JSON(http.StatusOK, res) + return + } +} + +func CreateDocs(db *model.DBEngine) gin.HandlerFunc { + return func(c *gin.Context) { + var gitBlobInfo GitBlobInfo + if err := c.BindJSON(&gitBlobInfo); err != nil { + c.JSON(http.StatusBadRequest, gin.H{ + "error": err.Error(), + }) + return + } + + es, err := elasticsearch.NewTypedClient(elasticsearch.Config{}) + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{ + "error": err.Error(), + }) + return + } + + response, err := es.Index(gitBlobInfoIndexName). + Request(&gitBlobInfo). + Id(gitBlobInfo.ID()). + Refresh(refresh.Waitfor). + Do(c) + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{ + "error": err.Error(), + }) + return + } + c.JSON(http.StatusOK, response) + return + } +} + +func QueryDocs(db *model.DBEngine) gin.HandlerFunc { + return func(c *gin.Context) { + var gitBlobInfo GitBlobInfo + if err := c.BindJSON(&gitBlobInfo); err != nil { + c.JSON(http.StatusBadRequest, gin.H{ + "error": err.Error(), + }) + return + } + + log.Debugf("query info: %v", gitBlobInfo) + + es, err := elasticsearch.NewTypedClient(elasticsearch.Config{}) + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{ + "error": err.Error(), + }) + return + } + boolQuery := types.BoolQuery{} + + if gitBlobInfo.RepoName != "" { + boolQuery.Filter = append(boolQuery.Filter, types.Query{ + Term: map[string]types.TermQuery{"repoName.keyword": {Value: gitBlobInfo.RepoName}}, + }) + } + if gitBlobInfo.FilePath != "" { + boolQuery.Filter = append(boolQuery.Filter, types.Query{ + Term: map[string]types.TermQuery{"filePath.keyword": {Value: gitBlobInfo.FilePath}}, + }) + } + if gitBlobInfo.Revision != "" { + boolQuery.Filter = append(boolQuery.Filter, types.Query{ + Term: map[string]types.TermQuery{"revision": {Value: gitBlobInfo.Revision}}, + }) + } + if gitBlobInfo.BlobID != "" { + boolQuery.Filter = append(boolQuery.Filter, types.Query{ + Term: map[string]types.TermQuery{"blobID": {Value: gitBlobInfo.BlobID}}, + }) + } + if gitBlobInfo.Language != "" { + boolQuery.Filter = append(boolQuery.Filter, types.Query{ + Term: map[string]types.TermQuery{"language": {Value: gitBlobInfo.Language}}, + }) + } + if gitBlobInfo.Contents != "" { + boolQuery.Must = append(boolQuery.Must, + types.Query{ + Match: map[string]types.MatchQuery{ + "contents": {Query: gitBlobInfo.Contents}, + }, + }) + } + + res, err := es.Search().Index(gitBlobInfoIndexName).TrackTotalHits("true"). + Request(&search.Request{ + Query: &types.Query{ + Bool: &boolQuery, + }, + }).Do(c) + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{ + "error": err.Error(), + }) + return + } + + gitblobs := []*GitBlobInfo{} + for _, hit := range res.Hits.Hits { + var result *GitBlobInfo + + err := json.Unmarshal(hit.Source_, &result) + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{ + "error": err.Error(), + }) + return + } + gitblobs = append(gitblobs, result) + } + c.JSON(http.StatusOK, gin.H{ + "gitblobs": gitblobs, + }) + } +} diff --git a/pkg/service/search/elastic_search_test.go b/pkg/service/search/elastic_search_test.go new file mode 100644 index 0000000..64ee82a --- /dev/null +++ b/pkg/service/search/elastic_search_test.go @@ -0,0 +1,129 @@ +package search + +import ( + "bytes" + "encoding/json" + "github.com/stretchr/testify/require" + "io" + "net/http" + "os" + "testing" +) + +func TestDeleteIndex(t *testing.T) { + r := require.New(t) + + // 创建 HTTP 请求 + req, err := http.NewRequest("DELETE", "http://localhost:8083/api/v2/_search_test/index", nil) + r.NoError(err) + + // 发送 HTTP 请求并处理响应 + client := &http.Client{} + resp, err := client.Do(req) + r.NoError(err) + + _, err = io.Copy(os.Stdout, resp.Body) + r.NoError(err) + + err = resp.Body.Close() + r.NoError(err) +} + +func TestCreateIndex(t *testing.T) { + r := require.New(t) + // 创建 HTTP 请求 + req, err := http.NewRequest("POST", "http://localhost:8083/api/v2/_search_test/index", nil) + r.NoError(err) + + // 发送 HTTP 请求并处理响应 + client := &http.Client{} + resp, err := client.Do(req) + r.NoError(err) + + _, err = io.Copy(os.Stdout, resp.Body) + r.NoError(err) + + err = resp.Body.Close() + r.NoError(err) +} + +func TestCreateDocs(t *testing.T) { + r := require.New(t) + + // 创建一个包含多个 GitBlobInfo 实例的切片 + blobs := []GitBlobInfo{ + { + RepoName: "my-repo", + Revision: "main", + FilePath: "path/to/file1", + BlobID: "abc123", + Language: "go", + Contents: "package main\n\nimport \"fmt\"\n\nfunc main() {\n fmt.Println(\"Hello, World!\")\n}", + }, + { + RepoName: "my-repo", + Revision: "dev", + FilePath: "path/to/file2", + BlobID: "def456", + Language: "python", + Contents: "print('Hello, World!')", + }, + { + RepoName: "my-repo", + Revision: "main", + FilePath: "path/to/file3", + BlobID: "ghi789", + Language: "java", + Contents: "public class Main {\n public static void main(String[] args) {\n System.out.println(\"Hello, World!\");\n }\n}", + }, + } + + // 遍历 blobs 切片并发送 HTTP 请求 + for _, blob := range blobs { + jsonBlob, err := json.Marshal(blob) + r.NoError(err) + + // 创建 HTTP 请求 + req, err := http.NewRequest("POST", "http://localhost:8083/api/v2/_search_test/docs", bytes.NewBuffer(jsonBlob)) + r.NoError(err) + + req.Header.Set("Content-Type", "application/json") + + // 发送 HTTP 请求并处理响应 + client := &http.Client{} + resp, err := client.Do(req) + r.NoError(err) + + _, err = io.Copy(os.Stdout, resp.Body) + r.NoError(err) + + err = resp.Body.Close() + r.NoError(err) + } +} + +func TestQueryDocs(t *testing.T) { + r := require.New(t) + + blob := &GitBlobInfo{ + RepoName: "my-repo", + Contents: "main", + } + jsonBlob, err := json.Marshal(blob) + r.NoError(err) + + req, err := http.NewRequest("POST", "http://localhost:8083/api/v2/_search_test/search", bytes.NewBuffer(jsonBlob)) + r.NoError(err) + + req.Header.Set("Content-Type", "application/json") + + client := &http.Client{} + resp, err := client.Do(req) + r.NoError(err) + + _, err = io.Copy(os.Stdout, resp.Body) + r.NoError(err) + + err = resp.Body.Close() + r.NoError(err) +}