-
-
Save dblanchette/b8ed8cf42431f56024c1c70ed5137e0f to your computer and use it in GitHub Desktop.
import os | |
from datetime import datetime, timedelta | |
from typing import Any, Dict, Generator, List, Union | |
import requests | |
# Optional - to connect using OAuth credentials | |
from oauthlib.oauth1 import SIGNATURE_RSA | |
class JiraClient: | |
def __init__( | |
self, | |
username: str = None, | |
api_token: str = None, | |
access_token: str = None, | |
access_token_secret: str = None, | |
consumer_key: str = None, | |
key_cert: str = None, | |
): | |
self._user_url = os.getenv("JIRA_SERVER", "").rstrip("/") | |
self._base_url = f"{self._user_url}/rest/api/3" | |
if username and api_token: | |
self._session = requests.Session() | |
self._session.auth = (username, api_token) | |
return | |
elif access_token and access_token_secret and consumer_key and key_cert: | |
self._session = OAuth1Session( | |
consumer_key, | |
rsa_key=key_cert, | |
resource_owner_key=access_token, | |
resource_owner_secret=access_token_secret, | |
signature_method=SIGNATURE_RSA, | |
) | |
else: | |
raise ValueError("Must use API token or OAuth credentials") | |
def _get_paginated_results( | |
self, url: str, results_key: str, parameters: Dict[str, Union[str, int]] = None, use_post: bool = False, | |
) -> Generator[Dict[str, Any], None, None]: | |
"""Get results of a paginated call that uses 'maxResults', 'startAt', and 'total' attributes. | |
:param url: URL without any pagination parameters | |
:param results_key: The key of the response dict that contains the actual elements to return (varies from calls to calls). Ex.: "items" | |
:param parameters: If use_post is False, URL parameters. If use_post is True, json encoded body parameters | |
:param use_post: Use POST instead of GET. Needed if parameters are too long to fit in an URL | |
""" | |
parameters = parameters or {} | |
results_per_page = 1000 | |
parameters["maxResults"] = results_per_page | |
next = 0 | |
while True: | |
parameters["startAt"] = next | |
if use_post: | |
response = self._session.post(url, json=parameters) | |
else: | |
response = self._session.get(url, params=parameters) | |
response.raise_for_status() | |
response_json = response.json() | |
results = response_json[results_key] | |
if response_json["maxResults"] < results_per_page: | |
# Some calls limit the maximum value of maxResults | |
results_per_page = response_json["maxResults"] | |
parameters["maxResults"] = results_per_page | |
for result in results: | |
yield result | |
next += results_per_page | |
if next >= response_json["total"]: | |
return | |
def _get_paginated_results_with_next_page_link(self, url: str) -> Generator[Dict[str, Any], None, None]: | |
"""Get results of a call that returns a payload with the lastPage and nextPage attributes""" | |
is_last_page = False | |
while not is_last_page: | |
response = self._session.get(url) | |
response.raise_for_status() | |
response_json = response.json() | |
for result in response_json["values"]: | |
yield result | |
is_last_page = response_json.get("lastPage", True) | |
if not is_last_page: | |
url = response_json["nextPage"] | |
def retrieve_worklogs_updated_since(self, start: datetime) -> List[Dict[str, Any]]: | |
"""Retrieve worklog objects for all worklogs that have been created or updated since the provided datetime | |
Faster than getting worklogs through issues | |
""" | |
worklog_ids: List[str] = [] | |
for worklog_entry in self._get_paginated_results_with_next_page_link( | |
f"{self._base_url}/worklog/updated?since={int(start.timestamp() * 1000)}" | |
): | |
worklog_ids.append(worklog_entry["worklogId"]) | |
worklogs_per_page = 1000 | |
ids_in_groups_per_page = [worklog_ids[i : i + worklogs_per_page] for i in range(0, len(worklog_ids), worklogs_per_page)] | |
worklogs_by_id: Dict[str, Dict[str, Any]] = {} # Using a dict to remove duplicates returned by the Jira API | |
# This is kind of a manual pagination. The documentation only states "The returned list of worklogs is limited to 1000 items." | |
# Doc: https://developer.atlassian.com/cloud/jira/platform/rest/v3/#api-rest-api-3-worklog-list-post | |
for ids_to_get in ids_in_groups_per_page: | |
for worklog in self._session.post(f"{self._base_url}/worklog/list", json={"ids": ids_to_get}).json(): | |
# Optionnaly remove the worklogs you don't want (not in the right time period) | |
worklogs_by_id[worklog["id"]] = worklog | |
return list(worklogs_by_id.values()) | |
def search_issues(self, jql: str, fields: List[str] = None) -> List[Dict[str, Any]]: | |
"""Return issues that matches a specified JQL query""" | |
issues: List[Dict[str, Any]] = [] | |
parameters: Dict[str, Union[str, List[str]]] = {"jql": jql} | |
if fields: | |
parameters["fields"] = fields | |
for issue in self._get_paginated_results(f"{self._base_url}/search", parameters=parameters, results_key="issues", use_post=True): | |
issues.append(issue) | |
return issues | |
# Point 3 - get issues for the retrieved worklogs | |
def retrieve_issues_for_worklogs(self, worklogs: List[Dict[str, Any]], fields: List[str] = None) -> List[Dict[str, Any]]: | |
"""Get Issue objects referenced in a list of worklogs""" | |
return self.search_issues(f"id in ({','.join(str(issue_id) for issue_id in set(worklog['issueId'] for worklog in worklogs))})", fields=fields) | |
# Example usage | |
os.environ["JIRA_SERVER"] = "https://mycompany.atlassian.net/" | |
client = JiraClient("[email protected]", "my_api_token_12345") | |
recent_worklogs = client.retrieve_worklogs_updated_since(datetime.now() - timedelta(days=14)) |
Line 100:
ids_in_groups_per_page = [worklog_ids[i : i + worklogs_per_page - 1] for i in range(0, len(worklog_ids), worklogs_per_page)]
Are you sure about this?
Let's say we have 32 worklogs IDs (here identified by the numbers 0 to 31)
>>> worklog_ids
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]
If we list 10 per_page, we get 3 lists of 10 and one list of 2:
>>> [worklog_ids[i : i + worklogs_per_page] for i in range(0, len(worklog_ids), worklogs_per_page)]
[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [10, 11, 12, 13, 14, 15, 16, 17, 18, 19], [20, 21, 22, 23, 24, 25, 26, 27, 28, 29], [30, 31]]
With your correction, we miss number 9, 19, and 29:
[[0, 1, 2, 3, 4, 5, 6, 7, 8], [10, 11, 12, 13, 14, 15, 16, 17, 18], [20, 21, 22, 23, 24, 25, 26, 27, 28], [30, 31]]```
Of course you're right. That's not a good correction.
However, there still must be something wrong with the code, somewhere above the place that I pointed out. The list worklog_ids
returned by the generator has some specific items duplicated. Like this:
>>> worklog_ids
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 29, 30, 31]
I think what can happen is that new entries were added in between paginated calls so the list shifts. Let's say we request the 100 more recent entries, then a user creates a new worklog entry and we request entries 101 to 200, then entry 101 is the previous entry 100.
This can be solved by removing duplicates:
Line 109
return list(set(worklogs))
Makes sense?
It is reproducible and it is actually the Jira API. It builds the nextPage URL using the timestamp of the last item on current page. As a consequence, the next page always starts with that last item from the previous page (plus other items with the same timestamp). I agree removing the duplicates as you suggested makes sense.
Interesting, I modified the gist. Thanks and have a nice day!
thanks for the quick snippet . I tried using it as is ( with my personal details ) throws some error at :
An error was encountered: unhashable type: 'dict' Traceback (most recent call last): File "<stdin>", line 106, in retrieve_worklogs_updated_since TypeError: unhashable type: 'dict'
Any leads or I can debug it
@singh-ab
you can try this version
def retrieve_worklogs_updated_since(self, start: datetime) -> List[Dict[str, Any]]:
"""Retrieve worklog objects for all worklogs that have been created or updated since the provided datetime
Faster than getting worklogs through issues
"""
worklog_ids: List[str] = []
for worklog_entry in self._get_paginated_results_with_next_page_link(
f"{self._base_url}/worklog/updated?since={int(start.timestamp() * 1000)}"
):
worklog_ids.append(worklog_entry["worklogId"])
worklogs_per_page = 1000
ids_in_groups_per_page = [worklog_ids[i : i + worklogs_per_page] for i in range(0, len(worklog_ids), worklogs_per_page)]
worklogs: List[Dict[str, Any]] = []
# This is kind of a manual pagination. The documentation only states "The returned list of worklogs is limited to 1000 items."
# Doc: https://developer.atlassian.com/cloud/jira/platform/rest/v3/#api-rest-api-3-worklog-list-post
for ids_to_get in ids_in_groups_per_page:
for worklog in self._session.post(f"{self._base_url}/worklog/list", json={"ids": ids_to_get}).json():
# Optionnaly remove the worklogs you don't want (not in the right time period)
worklogs.append(worklog)
# Remove duplicates returned by the Jira API
worklogs_clean = []
[worklogs_clean.append(obj) for obj in worklogs if obj not in worklogs_clean]
return worklogs_clean
thanks for the quick snippet . I tried using it as is ( with my personal details ) throws some error at :
An error was encountered: unhashable type: 'dict' Traceback (most recent call last): File "<stdin>", line 106, in retrieve_worklogs_updated_since TypeError: unhashable type: 'dict'
Any leads or I can debug it
@singh-ab The issue should be fixed now
Thanks for the help @joshmuente !
@dblanchette Thanks a lot for this gist!
Is there any reason why you are not using
https://pypi.org/project/jira/
?
@aquac I don't know if this has been fixed since, but it did not support pagination at time of creating this gist.
@dblanchette Running this script returns blank. I'm just updating the script with my URL, login email address and API token.
//
os.environ["JIRA_SERVER"] = "https://mycompany.atlassian.net/"
client = JiraClient("[email protected]", "my_api_token_12345")
//
Am I missing something ?
@svgincan The script does not retrun anything, it is meant to be included in another project.
Depending on your needs, you may want to print recent_worklogs
or process it more, for example.
Line 100: