Skip to content

Instantly share code, notes, and snippets.

@hiroshil
Last active December 24, 2023 17:30
Show Gist options
  • Save hiroshil/9baa9f9a5e3ab8cfb481d698e2ae3e80 to your computer and use it in GitHub Desktop.
Save hiroshil/9baa9f9a5e3ab8cfb481d698e2ae3e80 to your computer and use it in GitHub Desktop.
Retrieve list of folder from SharePoint folder share link and generate direct download links. (Requirement: xmltodict)
import re
import requests, json
from urllib import parse
import xmltodict
# https://sharepoint.stackexchange.com/questions/238013/get-list-of-files-from-sharepoint-folder-by-url
# https://stackoverflow.com/questions/25091976/python-requests-get-cookies
# https://github.com/juju/python-libjuju/blob/master/juju/client/gocookies.py
# https://stackoverflow.com/questions/13030095/how-to-save-requests-python-cookies-to-a-file
# https://stackoverflow.com/questions/71576413/saving-data-to-a-json-file-using-python
# https://stackoverflow.com/questions/8628152/url-decode-with-python-3
# https://stackoverflow.com/questions/6925825/get-subdomain-from-url-using-python
# https://stackoverflow.com/questions/5074803/retrieving-parameters-from-a-url
# https://stackoverflow.com/questions/1883980/find-the-nth-occurrence-of-substring-in-a-string
def find_nth(s,p,n):
return [m.start() for m in re.finditer(p,s)][n-1]
# Fetch files information
url = 'PUT_SHARE_LINK_HERE'
session = requests.Session()
r = session.get(url)
final_url = parse.unquote(r.url)
id = query_def=parse.parse_qs(parse.urlparse(final_url).query)['id'][0]
name = parse.urlparse(url).hostname.split('.')[0]
connector = id[1:find_nth(id,r'/',3)]
q = "https://" + name + ".sharepoint.com/" + connector + "/_api/web/getfolderbyserverrelativeurl('/" + id + "/')/Files"
res = session.get(q)
l = xmltodict.parse(res.content)
# Export JSON cookies
cookies = []
for c in session.cookies:
cookies.append({
"name": c.name,
"value": c.value,
"domain": c.domain,
"path": c.path,
"expires": c.expires if c.expires else 0,
"secure": c.secure,
"hostOnly": c.domain_specified,
"httpOnly": c.get_nonstandard_attr('httpOnly')if c.has_nonstandard_attr('httpOnly') else False
})
with open('cookies.txt', 'w') as f:
json.dump(cookies, f)
# Generate download links
links = []
for entry in l['feed']['entry']:
uid = entry['content']['m:properties']['d:UniqueId']['#text']
links.append('https://' + name + '.sharepoint.com/' + connector + '/_layouts/15/download.aspx?UniqueId=' + uid)
with open(parent_name + "links.txt","a") as f:
f.write('\n'.join(links))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment