Работа с OpenSearch в Go для системы управления знаниями

В этом руководстве я покажу, как реализовать систему знаний с таксономиями в OpenSearch с использованием Go, включая сложные запросы и построение дерева знаний.

Подготовка структуры данных

Сначала определим структуру документа для нашей базы знаний:

type KnowledgeDocument struct {
    ID          string    `json:"id"`
    Title       string    `json:"title"`
    Content     string    `json:"content"`
    Category    string    `json:"category"`
    Tags        []string  `json:"tags"`
    Benefits    []string  `json:"benefits"`
    Locations   []string  `json:"locations"`
    LevelUser   string    `json:"level_user"`
    CreatedAt   time.Time `json:"created_at"`
    UpdatedAt   time.Time `json:"updated_at"`
    RelatedDocs []string  `json:"related_docs"`
    Solutions   []struct {
        Description string   `json:"description"`
        Steps       []string `json:"steps"`
    } `json:"solutions"`
}

Настройка клиента OpenSearch в Go

package main

import (
    "context"
    "crypto/tls"
    "fmt"
    "log"
    "net/http"
    "strings"
    "time"

    "github.com/opensearch-project/opensearch-go"
    "github.com/opensearch-project/opensearch-go/opensearchapi"
)

func getOpenSearchClient() *opensearch.Client {
    cfg := opensearch.Config{
        Addresses: []string{"https://localhost:9200"},
        Username:  "admin",
        Password:  "admin",
        Transport: &http.Transport{
            TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
        },
    }
    
    client, err := opensearch.NewClient(cfg)
    if err != nil {
        log.Fatalf("Error creating the client: %s", err)
    }
    
    return client
}

Создание индекса с маппингом

func createKnowledgeIndex(client *opensearch.Client) error {
    mapping := `
    {
        "settings": {
            "index": {
                "number_of_shards": 1,
                "number_of_replicas": 1
            },
            "analysis": {
                "analyzer": {
                    "custom_analyzer": {
                        "type": "custom",
                        "tokenizer": "standard",
                        "filter": ["lowercase", "stemmer"]
                    }
                }
            }
        },
        "mappings": {
            "properties": {
                "title": {
                    "type": "text",
                    "analyzer": "custom_analyzer",
                    "fields": {
                        "keyword": {
                            "type": "keyword"
                        }
                    }
                },
                "content": {
                    "type": "text",
                    "analyzer": "custom_analyzer"
                },
                "category": {
                    "type": "keyword"
                },
                "tags": {
                    "type": "keyword"
                },
                "benefits": {
                    "type": "keyword"
                },
                "locations": {
                    "type": "keyword"
                },
                "level_user": {
                    "type": "keyword"
                },
                "created_at": {
                    "type": "date"
                },
                "updated_at": {
                    "type": "date"
                },
                "related_docs": {
                    "type": "keyword"
                },
                "solutions": {
                    "type": "nested",
                    "properties": {
                        "description": {
                            "type": "text",
                            "analyzer": "custom_analyzer"
                        },
                        "steps": {
                            "type": "text",
                            "analyzer": "custom_analyzer"
                        }
                    }
                }
            }
        }
    }`

    req := opensearchapi.IndicesCreateRequest{
        Index: "knowledge_base",
        Body:  strings.NewReader(mapping),
    }

    res, err := req.Do(context.Background(), client)
    if err != nil {
        return fmt.Errorf("error creating index: %w", err)
    }
    defer res.Body.Close()

    if res.IsError() {
        return fmt.Errorf("error response: %s", res.String())
    }

    return nil
}

Добавление документа в базу знаний

func addDocument(client *opensearch.Client, doc KnowledgeDocument) error {
    docBytes, err := json.Marshal(doc)
    if err != nil {
        return fmt.Errorf("error marshaling document: %w", err)
    }

    req := opensearchapi.IndexRequest{
        Index:      "knowledge_base",
        Body:       strings.NewReader(string(docBytes)),
        DocumentID: doc.ID,
        Refresh:    "true",
    }

    res, err := req.Do(context.Background(), client)
    if err != nil {
        return fmt.Errorf("error indexing document: %w", err)
    }
    defer res.Body.Close()

    if res.IsError() {
        return fmt.Errorf("error response: %s", res.String())
    }

    return nil
}

Пример сложного поиска по таксономиям

func searchDocuments(client *opensearch.Client, query string, filters map[string]interface{}) ([]KnowledgeDocument, error) {
    var buf bytes.Buffer
    searchQuery := map[string]interface{}{
        "query": map[string]interface{}{
            "bool": map[string]interface{}{
                "must": []map[string]interface{}{
                    {
                        "multi_match": map[string]interface{}{
                            "query":  query,
                            "fields": []string{"title^3", "content", "solutions.description", "solutions.steps"},
                        },
                    },
                },
                "filter": []map[string]interface{}{},
            },
        },
        "size": 10,
    }

    // Добавляем фильтры по таксономиям
    for field, value := range filters {
        filter := map[string]interface{}{
            "term": map[string]interface{}{
                field: value,
            },
        }
        searchQuery["query"].(map[string]interface{})["bool"].(map[string]interface{})["filter"] = append(
            searchQuery["query"].(map[string]interface{})["bool"].(map[string]interface{})["filter"].([]map[string]interface{}),
            filter,
        )
    }

    if err := json.NewEncoder(&buf).Encode(searchQuery); err != nil {
        return nil, fmt.Errorf("error encoding query: %w", err)
    }

    res, err := client.Search(
        client.Search.WithContext(context.Background()),
        client.Search.WithIndex("knowledge_base"),
        client.Search.WithBody(&buf),
        client.Search.WithTrackTotalHits(true),
    )
    if err != nil {
        return nil, fmt.Errorf("error executing search: %w", err)
    }
    defer res.Body.Close()

    if res.IsError() {
        return nil, fmt.Errorf("error response: %s", res.String())
    }

    var result struct {
        Hits struct {
            Hits []struct {
                Source KnowledgeDocument `json:"_source"`
            } `json:"hits"`
        } `json:"hits"`
    }

    if err := json.NewDecoder(res.Body).Decode(&result); err != nil {
        return nil, fmt.Errorf("error parsing the response body: %w", err)
    }

    var docs []KnowledgeDocument
    for _, hit := range result.Hits.Hits {
        docs = append(docs, hit.Source)
    }

    return docs, nil
}

Решение сложных запросов (например, проблема с принтером)

func troubleshootPrinterIssue(client *opensearch.Client, problemDescription string) ([]KnowledgeDocument, error) {
    var buf bytes.Buffer
    
    query := map[string]interface{}{
        "query": map[string]interface{}{
            "bool": map[string]interface{}{
                "must": []map[string]interface{}{
                    {
                        "match": map[string]interface{}{
                            "category": "printer",
                        },
                    },
                    {
                        "multi_match": map[string]interface{}{
                            "query":  problemDescription,
                            "fields": []string{"title^3", "content", "solutions.description", "solutions.steps"},
                        },
                    },
                },
                "should": []map[string]interface{}{
                    {
                        "match": map[string]interface{}{
                            "tags": "streaks",
                        },
                    },
                    {
                        "match": map[string]interface{}{
                            "tags": "cartridge",
                        },
                    },
                    {
                        "match": map[string]interface{}{
                            "tags": "printing_quality",
                        },
                    },
                },
                "minimum_should_match": 1,
            },
        },
        "size": 5,
    }

    if err := json.NewEncoder(&buf).Encode(query); err != nil {
        return nil, fmt.Errorf("error encoding query: %w", err)
    }

    res, err := client.Search(
        client.Search.WithContext(context.Background()),
        client.Search.WithIndex("knowledge_base"),
        client.Search.WithBody(&buf),
        client.Search.WithTrackTotalHits(true),
    )
    if err != nil {
        return nil, fmt.Errorf("error executing search: %w", err)
    }
    defer res.Body.Close()

    if res.IsError() {
        return nil, fmt.Errorf("error response: %s", res.String())
    }

    var result struct {
        Hits struct {
            Hits []struct {
                Source KnowledgeDocument `json:"_source"`
            } `json:"hits"`
        } `json:"hits"`
    }

    if err := json.NewDecoder(res.Body).Decode(&result); err != nil {
        return nil, fmt.Errorf("error parsing the response body: %w", err)
    }

    var docs []KnowledgeDocument
    for _, hit := range result.Hits.Hits {
        docs = append(docs, hit.Source)
    }

    return docs, nil
}

Построение дерева знаний

func buildKnowledgeTree(client *opensearch.Client, rootCategory string, depth int) (map[string]interface{}, error) {
    var buf bytes.Buffer
    
    query := map[string]interface{}{
        "size": 0,
        "query": map[string]interface{}{
            "term": map[string]interface{}{
                "category.keyword": rootCategory,
            },
        },
        "aggs": map[string]interface{}{
            "subcategories": map[string]interface{}{
                "terms": map[string]interface{}{
                    "field": "category.keyword",
                    "size":  10,
                },
                "aggs": map[string]interface{}{
                    "related_tags": map[string]interface{}{
                        "terms": map[string]interface{}{
                            "field": "tags.keyword",
                            "size":  5,
                        },
                    },
                    "top_documents": map[string]interface{}{
                        "top_hits": map[string]interface{}{
                            "size": 3,
                            "_source": map[string]interface{}{
                                "includes": []string{"title", "id"},
                            },
                        },
                    },
                },
            },
        },
    }

    if err := json.NewEncoder(&buf).Encode(query); err != nil {
        return nil, fmt.Errorf("error encoding query: %w", err)
    }

    res, err := client.Search(
        client.Search.WithContext(context.Background()),
        client.Search.WithIndex("knowledge_base"),
        client.Search.WithBody(&buf),
    )
    if err != nil {
        return nil, fmt.Errorf("error executing search: %w", err)
    }
    defer res.Body.Close()

    if res.IsError() {
        return nil, fmt.Errorf("error response: %s", res.String())
    }

    var result struct {
        Aggregations struct {
            Subcategories struct {
                Buckets []struct {
                    Key      string `json:"key"`
                    DocCount int    `json:"doc_count"`
                    RelatedTags struct {
                        Buckets []struct {
                            Key string `json:"key"`
                        } `json:"buckets"`
                    } `json:"related_tags"`
                    TopDocuments struct {
                        Hits struct {
                            Hits []struct {
                                Source struct {
                                    Title string `json:"title"`
                                    ID    string `json:"id"`
                                } `json:"_source"`
                            } `json:"hits"`
                        } `json:"hits"`
                    } `json:"top_documents"`
                } `json:"buckets"`
            } `json:"subcategories"`
        } `json:"aggregations"`
    }

    if err := json.NewDecoder(res.Body).Decode(&result); err != nil {
        return nil, fmt.Errorf("error parsing the response body: %w", err)
    }

    tree := make(map[string]interface{})
    tree["category"] = rootCategory

    var subcategories []map[string]interface{}
    for _, bucket := range result.Aggregations.Subcategories.Buckets {
        subcategory := map[string]interface{}{
            "name":        bucket.Key,
            "count":       bucket.DocCount,
            "popularTags": []string{},
            "documents":   []map[string]string{},
        }

        for _, tag := range bucket.RelatedTags.Buckets {
            subcategory["popularTags"] = append(subcategory["popularTags"].([]string), tag.Key)
        }

        for _, doc := range bucket.TopDocuments.Hits.Hits {
            subcategory["documents"] = append(subcategory["documents"].([]map[string]string), map[string]string{
                "title": doc.Source.Title,
                "id":    doc.Source.ID,
            })
        }

        subcategories = append(subcategories, subcategory)
    }

    tree["subcategories"] = subcategories
    return tree, nil
}

Пример использования

func main() {
    client := getOpenSearchClient()
    
    // Создаем индекс (один раз)
    if err := createKnowledgeIndex(client); err != nil {
        log.Fatalf("Error creating index: %v", err)
    }
    
    // Добавляем документ о проблемах с принтером
    doc := KnowledgeDocument{
        ID:        "printer_streaks_1",
        Title:     "Принтер печатает с полосами",
        Content:   "Если принтер печатает с полосами, даже с новым картриджем, возможные причины...",
        Category:  "printer",
        Tags:      []string{"streaks", "printing_quality", "cartridge"},
        Benefits:  []string{"troubleshooting", "repair"},
        Locations: []string{"office", "home"},
        LevelUser: "beginner",
        Solutions: []struct {
            Description string   `json:"description"`
            Steps       []string `json:"steps"`
        }{
            {
                Description: "Очистка печатающей головки",
                Steps: []string{
                    "Зайти в настройки принтера",
                    "Найти раздел обслуживания",
                    "Выбрать опцию очистки печатающей головки",
                },
            },
            {
                Description: "Проверка выравнивания картриджей",
                Steps: []string{
                    "Заменить картридж",
                    "Запустить процедуру выравнивания",
                },
            },
        },
    }
    
    if err := addDocument(client, doc); err != nil {
        log.Fatalf("Error adding document: %v", err)
    }
    
    // Поиск решения проблемы
    solutions, err := troubleshootPrinterIssue(client, "сломался принтер, печатает с полосой, картридж новый")
    if err != nil {
        log.Fatalf("Error searching: %v", err)
    }
    
    fmt.Println("Найдены решения:")
    for _, sol := range solutions {
        fmt.Printf("- %s\n", sol.Title)
        for _, solution := range sol.Solutions {
            fmt.Printf("  %s\n", solution.Description)
            for i, step := range solution.Steps {
                fmt.Printf("    %d. %s\n", i+1, step)
            }
        }
    }
    
    // Построение дерева знаний
    tree, err := buildKnowledgeTree(client, "printer", 2)
    if err != nil {
        log.Fatalf("Error building knowledge tree: %v", err)
    }
    
    fmt.Println("\nДерево знаний:")
    fmt.Printf("Категория: %s\n", tree["category"])
    for _, subcat := range tree["subcategories"].([]map[string]interface{}) {
        fmt.Printf("  Подкатегория: %s (%d документов)\n", subcat["name"], subcat["count"])
        fmt.Printf("    Популярные теги: %v\n", subcat["popularTags"])
    }
}