Skip to content

Instantly share code, notes, and snippets.

@camdencheek
Created November 4, 2022 17:43
Show Gist options
  • Save camdencheek/43f5716cdf63c1e66b49a940f04a4356 to your computer and use it in GitHub Desktop.
Save camdencheek/43f5716cdf63c1e66b49a940f04a4356 to your computer and use it in GitHub Desktop.
package main
import (
"bufio"
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
"net/http"
"os"
"strconv"
"strings"
"time"
"github.com/graph-gophers/graphql-go/relay"
)
func main() {
githubToken, ok := os.LookupEnv("GH_TOKEN")
if !ok {
panic("GH_TOKEN not set")
}
githubURL, ok := os.LookupEnv("GH_API_URL")
if !ok {
githubURL = "https://api.github.com/graphql"
}
sourcegraphToken, ok := os.LookupEnv("SG_TOKEN")
if !ok {
panic("SG_TOKEN not set")
}
sourcegraphURL, ok := os.LookupEnv("SG_URL")
if !ok {
panic("SG_URL not set")
}
ghc := &ghClient{
token: githubToken,
url: githubURL,
}
sgc := &sgClient{
token: sourcegraphToken,
url: sourcegraphURL,
}
ctx := context.Background()
f, err := os.Open("/tmp/reponames_remaining.txt")
if err != nil {
panic(err)
}
lineReader := bufio.NewScanner(f)
nextRepo := func() (int, string, bool) {
if !lineReader.Scan() {
return 0, "", false
}
fields := strings.Fields(lineReader.Text())
id, err := strconv.Atoi(fields[1])
if err != nil {
panic(err)
}
name := fields[3]
return id, name, true
}
total := 0
OUTER:
for {
start := time.Now()
var ids []int
var names []string
INNER:
for i := 0; i < 1000; i++ {
id, name, ok := nextRepo()
if !ok {
if len(ids) == 0 {
break OUTER
}
break INNER
}
ids = append(ids, id)
names = append(names, name)
total += 1
}
repoTopics, err := ghc.fetchTopics(ctx, ids, names)
if err != nil {
panic(err)
}
fmt.Printf("Fetching repo topics took %s\n", time.Since(start))
start = time.Now()
err = sgc.addRepoTopics(ctx, repoTopics)
if err != nil {
panic(err)
}
fmt.Printf("Adding tags took %s\n", time.Since(start))
fmt.Printf("%d/6271964\n", total)
}
}
type ghClient struct {
token string
url string
}
func (cli *ghClient) fetchTopics(ctx context.Context, ids []int, names []string) (map[int][]string, error) {
queryTemplate := `
query FetchTopics {
%s
rateLimit {
remaining
resetAt
}
}
`
repositoryFragment := `
r%d: repository(owner:"%s", name:"%s") {
repositoryTopics(first:100) {
nodes {
topic {
name
}
}
}
}
`
var fragments strings.Builder
for i, name := range names {
parts := strings.Split(name, "/")
if len(parts) != 3 {
continue
}
fmt.Fprintf(&fragments, repositoryFragment, i, parts[1], parts[2])
}
query := fmt.Sprintf(queryTemplate, fragments.String())
payload, err := json.Marshal(map[string]interface{}{"query": query})
if err != nil {
return nil, err
}
req, err := http.NewRequestWithContext(ctx, http.MethodPost, cli.url, bytes.NewReader(payload))
if err != nil {
return nil, err
}
req.Header.Add("Authorization", "bearer "+cli.token)
resp, err := http.DefaultClient.Do(req)
if err != nil {
return nil, err
}
dec := json.NewDecoder(resp.Body)
var decodedResponse struct {
Data map[string]json.RawMessage
}
err = dec.Decode(&decodedResponse)
if err != nil {
resp.Body.Close()
return nil, err
}
resp.Body.Close()
topics := make(map[int][]string)
for key, value := range decodedResponse.Data {
if key == "rateLimit" {
var rateLimit struct {
Remaining int
ResetAt string
}
if err := json.Unmarshal(value, &rateLimit); err != nil {
return nil, err
}
println("Remaining rate limit capacity: ", rateLimit.Remaining)
if rateLimit.Remaining < 10 {
t, err := time.Parse("2006-01-02T15:04:05Z", rateLimit.ResetAt)
if err != nil {
panic("failed to parse reset time")
}
sleepDuration := time.Now().Sub(t)
fmt.Printf("Hit rate limit. Sleeping for %s", sleepDuration)
time.Sleep(sleepDuration)
}
continue
}
i, err := strconv.Atoi(key[1:])
if err != nil {
panic(err)
}
var repository struct {
RepositoryTopics struct {
Nodes []struct {
Topic struct {
Name string
}
}
}
}
if err := json.Unmarshal(value, &repository); err != nil {
return nil, err
}
for _, topic := range repository.RepositoryTopics.Nodes {
topics[ids[i]] = append(topics[ids[i]], topic.Topic.Name)
}
}
return topics, nil
}
type sgClient struct {
token string
url string
}
func (cli *sgClient) addRepoTopics(ctx context.Context, repoTopics map[int][]string) error {
var b strings.Builder
b.WriteString("mutation AddRepoTopics {")
for repoID, topics := range repoTopics {
graphqlID := string(relay.MarshalID("Repository", repoID))
for i, topic := range topics {
fmt.Fprintf(&b, `
id%d_%d: addRepoKeyValuePair(repo:"%s", key:"%s") {
__typename
}
`, repoID, i, graphqlID, topic)
}
}
b.WriteString("}")
query := b.String()
payload := map[string]interface{}{
"query": query,
}
payloadBytes, err := json.Marshal(payload)
if err != nil {
return err
}
req, err := http.NewRequestWithContext(ctx, http.MethodPost, cli.url+"/.api/graphql", bytes.NewReader(payloadBytes))
if err != nil {
return err
}
req.Header.Add("Authorization", "token "+cli.token)
resp, err := http.DefaultClient.Do(req)
if err != nil {
return err
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
return errors.New(resp.Status)
}
return nil
}
@albertocavalcante
Copy link

@camdencheek Thanks for this

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment