SolrでConflの記事を検索 - インデクシングCLI編
SolrでConflの記事を検索 - それでも気分は高専生の個別実装編第一弾です。
ConflではREST APIを通じて、様々な記事をJSON形式で取得することができます。
#!bin/sh ENDPOOINT="conflのホスト名" LIMIT=30 TYPE=page EXPAND=space,history,body.view,metadata.labels read -p "Account: " ACCOUNT read -p "Password: " -s PASSWORD read -p "from(int): " FROM curl -X GET -m ${TIMEOUT} "https://${ENDPOOINT}?type=page&start={$FROM}&limit=${LIMIT}&expand=${EXPAND}" -u "${ACCOUNT}:${PASSWORD}" -H 'Accept: application/json'
type ( Space struct { Name string `json:"name"` } History struct { CreatedBy struct { UserName string `json:"username"` DisplayName string `json:"displayName"` } `json:"createdBy"` } Body struct { View struct { Value string `json:"value"` } `json:"view"` } ArticleConfl struct { ID string `json:"id"` Type string `json:"type"` Title string `json:"title"` Space Space `json:"space"` History History `json:"history"` Body Body `json:"body"` Links struct { WebUI string `json:"webui"` // webUI path } `json:"_links"` } ArticleConflList struct { Results []ArticleConfl `json:"results"` Links struct { Next string `json:"next"` Prev string `json:"prev"` } `json:"_links"` } )
type Article struct { ID string `json:"id"` Type string `json:"type"` Title string `json:"title"` SpaceName string `json:"space_name"` CreatedByUserName string `json:"createdBy_username"` CreatedByDisplayName string `json:"createdBy_displayName"` View string `json:"view"` URL string `json:url` Labels []string `json:"labels"` } func NewArticle(a ArticleConfl, endpoint string) Article { art := Article{ ID: a.ID, Type: a.Type, Title: a.Title, SpaceName: a.Space.Name, CreatedByUserName: a.History.CreatedBy.UserName, CreatedByDisplayName: a.History.CreatedBy.DisplayName, View: a.Body.View.Value, URL: endpoint + a.Links.WebUI, Labels: []string{}, } // 特定の単語が含まれるものに対してラベリング for k, terms := range common.LABEL_TERMS { for _, t := range terms { if strings.Contains(art.Title, t) { art.Labels = append(art.Labels, k) break } } } return art }
var ( LABEL_TERMS = map[string][]string{ "調査": {"調査", "サーベイ", "レポート", "まとめ", "検証"}, "試験": {"試験", "テスト", "検証"}, "MTG": {"議事録", "会議", "MTG", "ミーティング", "話し合い"}, "運用": {"運用方法", "オペレーション", "使用方法", "進め方", "手順"}, "Tips": {"Tips", "tips", "幸せになれる", "ノウハウ"}, "標準": {"標準", "スタンダード", "規則", "規約", "基準"}, "仕様": {"仕様", "エラーコード", "設計"}, "ホーム": {"ホーム"}, } )
func FetchArticlesStr(endpoint, expand, username, password string, from, limit int) (string, error) { // parse endpoint u, err := url.Parse(endpoint) if err != nil { return "", errors.Wrap(err, "Invalid endpoint") } u.Path = path.Join(u.Path, CONTENT_API_PATH) // build URL q := u.Query() q.Set("type", common.CONFL_CONTENT_TYPE) q.Set("start", strconv.Itoa(from)) q.Set("limit", strconv.Itoa(limit)) q.Set("expand", expand) u.RawQuery = q.Encode() // build request req, err := http.NewRequest("GET", u.String(), nil) if err != nil { return "", errors.Wrap(err, "Failed to make request") } req.Header.Add("Accept", CONTENT_TYPE_JSON) req.Header.Add("Authorization", toBasicAuthHeader(username, password)) // fetch articles (string) client := new(http.Client) res, err := client.Do(req) defer res.Body.Close() if err != nil { return "", errors.Wrap(err, "failed to fetch data") } else if res.StatusCode != http.StatusOK { return "", errors.New(fmt.Sprintf("HTTP(%d)", res.StatusCode)) } bytes, err := ioutil.ReadAll(res.Body) if err != nil { return "", errors.Wrap(err, "failed to read response data") } return string(bytes), nil }
RFC 7617 - The 'Basic' HTTP Authentication Scheme
func toBasicAuthHeader(username, password string) string { encoded := base64.StdEncoding.EncodeToString([]byte(username + ":" + password)) return fmt.Sprintf("Basic %s", encoded) }
func fetchArticlesConfl(endpoint, expand, username, password string, from, limit int) (model.ArticleConflList, error) { // fetch article string from confl artiListStr, err := util.FetchArticlesStr(endpoint, expand, username, password, from, limit) if err != nil { return model.ArticleConflList{}, errors.Wrap(err, "Failed to featch Article string") } // convert json ArticleConflList var artiList model.ArticleConflList if err := json.Unmarshal([]byte(artiListStr), &artiList); err != nil { return model.ArticleConflList{}, errors.Wrap(err, "Failed to parse JSON string") } return artiList, nil } func indexingArticle(endpoint, core string, commit bool, articles []model.Article) error { bytes, err := json.Marshal(articles) if err != nil { return errors.Wrap(err, "Failed to convert to json") } return util.IndexingSolr(endpoint, core, commit, string(bytes)) }
func IndexingSolr(endpoint, core string, commit bool, documentsStr string) error { // parse endpoint/path u, err := url.Parse(fmt.Sprintf(PATH_TEMPLATE, endpoint, core)) if err != nil { return errors.Wrap(err, "Invalid URL") } // build URL q := u.Query() q.Set("commit", strconv.FormatBool(commit)) u.RawQuery = q.Encode() // build request req, err := http.NewRequest("GET", u.String(), strings.NewReader(documentsStr)) if err != nil { return errors.Wrap(err, "Failed to make request") } req.Header.Add("Content-type", CONTENT_TYPE_JSON) req.Header.Add("charset", CHAR_SET) log.Println(req.URL.String()) log.Println(req.Header) log.Println(req.Body) // fetch articles (string) client := new(http.Client) res, err := client.Do(req) defer res.Body.Close() if err != nil { return errors.Wrap(err, "failed to fetch data") } else if res.StatusCode != http.StatusOK { return errors.New(fmt.Sprintf("HTTP(%d)", res.StatusCode)) } return nil }
何千、何万件も記事があると手動インデクシングなんてできないですからね... (^_^;)