Created
November 4, 2022 17:43
-
-
Save camdencheek/43f5716cdf63c1e66b49a940f04a4356 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"bufio" | |
"bytes" | |
"context" | |
"encoding/json" | |
"errors" | |
"fmt" | |
"net/http" | |
"os" | |
"strconv" | |
"strings" | |
"time" | |
"github.com/graph-gophers/graphql-go/relay" | |
) | |
func main() { | |
githubToken, ok := os.LookupEnv("GH_TOKEN") | |
if !ok { | |
panic("GH_TOKEN not set") | |
} | |
githubURL, ok := os.LookupEnv("GH_API_URL") | |
if !ok { | |
githubURL = "https://api.github.com/graphql" | |
} | |
sourcegraphToken, ok := os.LookupEnv("SG_TOKEN") | |
if !ok { | |
panic("SG_TOKEN not set") | |
} | |
sourcegraphURL, ok := os.LookupEnv("SG_URL") | |
if !ok { | |
panic("SG_URL not set") | |
} | |
ghc := &ghClient{ | |
token: githubToken, | |
url: githubURL, | |
} | |
sgc := &sgClient{ | |
token: sourcegraphToken, | |
url: sourcegraphURL, | |
} | |
ctx := context.Background() | |
f, err := os.Open("/tmp/reponames_remaining.txt") | |
if err != nil { | |
panic(err) | |
} | |
lineReader := bufio.NewScanner(f) | |
nextRepo := func() (int, string, bool) { | |
if !lineReader.Scan() { | |
return 0, "", false | |
} | |
fields := strings.Fields(lineReader.Text()) | |
id, err := strconv.Atoi(fields[1]) | |
if err != nil { | |
panic(err) | |
} | |
name := fields[3] | |
return id, name, true | |
} | |
total := 0 | |
OUTER: | |
for { | |
start := time.Now() | |
var ids []int | |
var names []string | |
INNER: | |
for i := 0; i < 1000; i++ { | |
id, name, ok := nextRepo() | |
if !ok { | |
if len(ids) == 0 { | |
break OUTER | |
} | |
break INNER | |
} | |
ids = append(ids, id) | |
names = append(names, name) | |
total += 1 | |
} | |
repoTopics, err := ghc.fetchTopics(ctx, ids, names) | |
if err != nil { | |
panic(err) | |
} | |
fmt.Printf("Fetching repo topics took %s\n", time.Since(start)) | |
start = time.Now() | |
err = sgc.addRepoTopics(ctx, repoTopics) | |
if err != nil { | |
panic(err) | |
} | |
fmt.Printf("Adding tags took %s\n", time.Since(start)) | |
fmt.Printf("%d/6271964\n", total) | |
} | |
} | |
type ghClient struct { | |
token string | |
url string | |
} | |
func (cli *ghClient) fetchTopics(ctx context.Context, ids []int, names []string) (map[int][]string, error) { | |
queryTemplate := ` | |
query FetchTopics { | |
%s | |
rateLimit { | |
remaining | |
resetAt | |
} | |
} | |
` | |
repositoryFragment := ` | |
r%d: repository(owner:"%s", name:"%s") { | |
repositoryTopics(first:100) { | |
nodes { | |
topic { | |
name | |
} | |
} | |
} | |
} | |
` | |
var fragments strings.Builder | |
for i, name := range names { | |
parts := strings.Split(name, "/") | |
if len(parts) != 3 { | |
continue | |
} | |
fmt.Fprintf(&fragments, repositoryFragment, i, parts[1], parts[2]) | |
} | |
query := fmt.Sprintf(queryTemplate, fragments.String()) | |
payload, err := json.Marshal(map[string]interface{}{"query": query}) | |
if err != nil { | |
return nil, err | |
} | |
req, err := http.NewRequestWithContext(ctx, http.MethodPost, cli.url, bytes.NewReader(payload)) | |
if err != nil { | |
return nil, err | |
} | |
req.Header.Add("Authorization", "bearer "+cli.token) | |
resp, err := http.DefaultClient.Do(req) | |
if err != nil { | |
return nil, err | |
} | |
dec := json.NewDecoder(resp.Body) | |
var decodedResponse struct { | |
Data map[string]json.RawMessage | |
} | |
err = dec.Decode(&decodedResponse) | |
if err != nil { | |
resp.Body.Close() | |
return nil, err | |
} | |
resp.Body.Close() | |
topics := make(map[int][]string) | |
for key, value := range decodedResponse.Data { | |
if key == "rateLimit" { | |
var rateLimit struct { | |
Remaining int | |
ResetAt string | |
} | |
if err := json.Unmarshal(value, &rateLimit); err != nil { | |
return nil, err | |
} | |
println("Remaining rate limit capacity: ", rateLimit.Remaining) | |
if rateLimit.Remaining < 10 { | |
t, err := time.Parse("2006-01-02T15:04:05Z", rateLimit.ResetAt) | |
if err != nil { | |
panic("failed to parse reset time") | |
} | |
sleepDuration := time.Now().Sub(t) | |
fmt.Printf("Hit rate limit. Sleeping for %s", sleepDuration) | |
time.Sleep(sleepDuration) | |
} | |
continue | |
} | |
i, err := strconv.Atoi(key[1:]) | |
if err != nil { | |
panic(err) | |
} | |
var repository struct { | |
RepositoryTopics struct { | |
Nodes []struct { | |
Topic struct { | |
Name string | |
} | |
} | |
} | |
} | |
if err := json.Unmarshal(value, &repository); err != nil { | |
return nil, err | |
} | |
for _, topic := range repository.RepositoryTopics.Nodes { | |
topics[ids[i]] = append(topics[ids[i]], topic.Topic.Name) | |
} | |
} | |
return topics, nil | |
} | |
type sgClient struct { | |
token string | |
url string | |
} | |
func (cli *sgClient) addRepoTopics(ctx context.Context, repoTopics map[int][]string) error { | |
var b strings.Builder | |
b.WriteString("mutation AddRepoTopics {") | |
for repoID, topics := range repoTopics { | |
graphqlID := string(relay.MarshalID("Repository", repoID)) | |
for i, topic := range topics { | |
fmt.Fprintf(&b, ` | |
id%d_%d: addRepoKeyValuePair(repo:"%s", key:"%s") { | |
__typename | |
} | |
`, repoID, i, graphqlID, topic) | |
} | |
} | |
b.WriteString("}") | |
query := b.String() | |
payload := map[string]interface{}{ | |
"query": query, | |
} | |
payloadBytes, err := json.Marshal(payload) | |
if err != nil { | |
return err | |
} | |
req, err := http.NewRequestWithContext(ctx, http.MethodPost, cli.url+"/.api/graphql", bytes.NewReader(payloadBytes)) | |
if err != nil { | |
return err | |
} | |
req.Header.Add("Authorization", "token "+cli.token) | |
resp, err := http.DefaultClient.Do(req) | |
if err != nil { | |
return err | |
} | |
defer resp.Body.Close() | |
if resp.StatusCode != 200 { | |
return errors.New(resp.Status) | |
} | |
return nil | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
@camdencheek Thanks for this