Xnuvers007 · October 12, 2024 09:57
diff --git a/unpamrepostory.py b/unpamrepostory.py
 import requests
 from bs4 import BeautifulSoup

 cookies = {
    'wordpress_test_cookie': 'WP+Cookie+check',
    'wp-settings-1': 'edit_element_vcUIPanelWidth%3D1021%26ampampampampedit_element_vcUIPanelLeft%3D14px%26ampampampampedit_element_vcUIPanelTop%3D59px%26ampampampampeditor%3Dtinymce%26ampampampamplibraryContent%3Dbrowse%26ampampampampimgsize%3Dfull%26ampamplibraryContent%3Dbrowse%26ampampmfold%3Do%26ampampedit_element_vcUIPanelLeft%3D67px%26ampampedit_element_vcUIPanelTop%3D136px%26ampampposts_list_mode%3Dlist%26uploader%3D1%26editor%3Dtinymce%26edit_element_vcUIPanelLeft%3D230px%26edit_element_vcUIPanelTop%3D137px%26libraryContent%3Dbrowse%26template_window_vcUIPanelWidth%3D1212%26template_window_vcUIPanelLeft%3D142px%26template_window_vcUIPanelTop%3D74px%26mfold%3Do',
    'wp-settings-time-1': '1719133983',
    'PHPSESSID': 'cc28vdiqhvlek3jv49hib7smd2',
    'sc_is_visitor_unique': 'rx12856607.1728455295.7B514FD2EE304FEE97F9BC36B4D3A245.2.2.2.2.2.2.2.2.1',
    'vCentminmod': '8735cc5bae5801c849211396cc63cd91',
 }

 headers = {
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
    'Accept-Language': 'id,en;q=0.9,en-GB;q=0.8,en-US;q=0.7,sv;q=0.6',
    'Connection': 'keep-alive',
    # 'Cookie': '_ga=GA1.1.1628365940.1714144651; _ga_SDV98CJGSQ=GS1.1.1717469011.8.1.1717469648.0.0.0; wordpress_test_cookie=WP+Cookie+check; wp-settings-1=edit_element_vcUIPanelWidth%3D1021%26ampampampampedit_element_vcUIPanelLeft%3D14px%26ampampampampedit_element_vcUIPanelTop%3D59px%26ampampampampeditor%3Dtinymce%26ampampampamplibraryContent%3Dbrowse%26ampampampampimgsize%3Dfull%26ampamplibraryContent%3Dbrowse%26ampampmfold%3Do%26ampampedit_element_vcUIPanelLeft%3D67px%26ampampedit_element_vcUIPanelTop%3D136px%26ampampposts_list_mode%3Dlist%26uploader%3D1%26editor%3Dtinymce%26edit_element_vcUIPanelLeft%3D230px%26edit_element_vcUIPanelTop%3D137px%26libraryContent%3Dbrowse%26template_window_vcUIPanelWidth%3D1212%26template_window_vcUIPanelLeft%3D142px%26template_window_vcUIPanelTop%3D74px%26mfold%3Do; wp-settings-time-1=1719133983; PHPSESSID=cc28vdiqhvlek3jv49hib7smd2; _ga_QZS0XPFQMJ=GS1.1.1720543539.8.0.1720543539.0.0.0; sc_is_visitor_unique=rx12856607.1728455295.7B514FD2EE304FEE97F9BC36B4D3A245.2.2.2.2.2.2.2.2.1; vCentminmod=8735cc5bae5801c849211396cc63cd91',
    'Referer': 'https://repository.unpam.ac.id/cgi/search/advanced',
    'Sec-Fetch-Dest': 'document',
    'Sec-Fetch-Mode': 'navigate',
    'Sec-Fetch-Site': 'same-origin',
    'Sec-Fetch-User': '?1',
    'Upgrade-Insecure-Requests': '1',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36 Edg/129.0.0.0',
    'sec-ch-ua': '"Microsoft Edge";v="129", "Not=A?Brand";v="8", "Chromium";v="129"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"Windows"',
 }

 params = {
    'screen': 'Search',
    'dataset': 'archive',
    'documents_merge': 'ALL',
    'documents': '',
    'title_merge': 'ALL',
    'title': 'Rekayasa perangkat lunak',
    'creators_name_merge': 'ALL',
    'creators_name': '',
    'abstract_merge': 'ALL',
    'abstract': '',
    'date': '',
    'keywords_merge': 'ALL',
    'keywords': '',
    'subjects_merge': 'ANY',
    'department_merge': 'ALL',
    'department': '',
    'editors_name_merge': 'ALL',
    'editors_name': '',
    'refereed': 'EITHER',
    'publication_merge': 'ALL',
    'publication': '',
    'satisfyall': 'ALL',
    'order': '-date/creators_name/title',
    '_action_search': 'Search',
 }

 response = requests.get(
    'https://repository.unpam.ac.id/cgi/search/archive/advanced',
    params=params,
    cookies=cookies,
    headers=headers,
    verify=False
 )

 # print(response.text)
 soup = BeautifulSoup(response.text, 'html.parser')

 div = soup.find('div', attrs={'class': 'ep_search_results'})

 results = div.find_all('tr', class_='ep_search_result')

 # Loop through each result and extract desired information
 for result in results:
    # Extract authors
    authors = result.find_all('span', class_='person_name')
    authors_list = [author.get_text(strip=True) for author in authors]

    # Extract title and link
    title_tag = result.find('a')
    title = title_tag.get_text(strip=True)
    link = title_tag['href']

    # Extract publication details (if available)
    publication_info = result.find('td', align='center')
    document_links = [link['href'] for link in publication_info.find_all('a')]

    # Print extracted information
    print("Authors:", ", ".join(authors_list))
    print("Title:", title)
    print("Link:", link)
    print("Document Links:", ", ".join(document_links))
    print()
	import requests
	from bs4 import BeautifulSoup

	cookies = {
	'wordpress_test_cookie': 'WP+Cookie+check',
	'wp-settings-1': 'edit_element_vcUIPanelWidth%3D1021%26ampampampampedit_element_vcUIPanelLeft%3D14px%26ampampampampedit_element_vcUIPanelTop%3D59px%26ampampampampeditor%3Dtinymce%26ampampampamplibraryContent%3Dbrowse%26ampampampampimgsize%3Dfull%26ampamplibraryContent%3Dbrowse%26ampampmfold%3Do%26ampampedit_element_vcUIPanelLeft%3D67px%26ampampedit_element_vcUIPanelTop%3D136px%26ampampposts_list_mode%3Dlist%26uploader%3D1%26editor%3Dtinymce%26edit_element_vcUIPanelLeft%3D230px%26edit_element_vcUIPanelTop%3D137px%26libraryContent%3Dbrowse%26template_window_vcUIPanelWidth%3D1212%26template_window_vcUIPanelLeft%3D142px%26template_window_vcUIPanelTop%3D74px%26mfold%3Do',
	'wp-settings-time-1': '1719133983',
	'PHPSESSID': 'cc28vdiqhvlek3jv49hib7smd2',
	'sc_is_visitor_unique': 'rx12856607.1728455295.7B514FD2EE304FEE97F9BC36B4D3A245.2.2.2.2.2.2.2.2.1',
	'vCentminmod': '8735cc5bae5801c849211396cc63cd91',
	}

	headers = {
	'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,/;q=0.8,application/signed-exchange;v=b3;q=0.7',
	'Accept-Language': 'id,en;q=0.9,en-GB;q=0.8,en-US;q=0.7,sv;q=0.6',
	'Connection': 'keep-alive',
	# 'Cookie': '_ga=GA1.1.1628365940.1714144651; _ga_SDV98CJGSQ=GS1.1.1717469011.8.1.1717469648.0.0.0; wordpress_test_cookie=WP+Cookie+check; wp-settings-1=edit_element_vcUIPanelWidth%3D1021%26ampampampampedit_element_vcUIPanelLeft%3D14px%26ampampampampedit_element_vcUIPanelTop%3D59px%26ampampampampeditor%3Dtinymce%26ampampampamplibraryContent%3Dbrowse%26ampampampampimgsize%3Dfull%26ampamplibraryContent%3Dbrowse%26ampampmfold%3Do%26ampampedit_element_vcUIPanelLeft%3D67px%26ampampedit_element_vcUIPanelTop%3D136px%26ampampposts_list_mode%3Dlist%26uploader%3D1%26editor%3Dtinymce%26edit_element_vcUIPanelLeft%3D230px%26edit_element_vcUIPanelTop%3D137px%26libraryContent%3Dbrowse%26template_window_vcUIPanelWidth%3D1212%26template_window_vcUIPanelLeft%3D142px%26template_window_vcUIPanelTop%3D74px%26mfold%3Do; wp-settings-time-1=1719133983; PHPSESSID=cc28vdiqhvlek3jv49hib7smd2; _ga_QZS0XPFQMJ=GS1.1.1720543539.8.0.1720543539.0.0.0; sc_is_visitor_unique=rx12856607.1728455295.7B514FD2EE304FEE97F9BC36B4D3A245.2.2.2.2.2.2.2.2.1; vCentminmod=8735cc5bae5801c849211396cc63cd91',
	'Referer': 'https://repository.unpam.ac.id/cgi/search/advanced',
	'Sec-Fetch-Dest': 'document',
	'Sec-Fetch-Mode': 'navigate',
	'Sec-Fetch-Site': 'same-origin',
	'Sec-Fetch-User': '?1',
	'Upgrade-Insecure-Requests': '1',
	'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36 Edg/129.0.0.0',
	'sec-ch-ua': '"Microsoft Edge";v="129", "Not=A?Brand";v="8", "Chromium";v="129"',
	'sec-ch-ua-mobile': '?0',
	'sec-ch-ua-platform': '"Windows"',
	}

	params = {
	'screen': 'Search',
	'dataset': 'archive',
	'documents_merge': 'ALL',
	'documents': '',
	'title_merge': 'ALL',
	'title': 'Rekayasa perangkat lunak',
	'creators_name_merge': 'ALL',
	'creators_name': '',
	'abstract_merge': 'ALL',
	'abstract': '',
	'date': '',
	'keywords_merge': 'ALL',
	'keywords': '',
	'subjects_merge': 'ANY',
	'department_merge': 'ALL',
	'department': '',
	'editors_name_merge': 'ALL',
	'editors_name': '',
	'refereed': 'EITHER',
	'publication_merge': 'ALL',
	'publication': '',
	'satisfyall': 'ALL',
	'order': '-date/creators_name/title',
	'_action_search': 'Search',
	}

	response = requests.get(
	'https://repository.unpam.ac.id/cgi/search/archive/advanced',
	params=params,
	cookies=cookies,
	headers=headers,
	verify=False
	)

	# print(response.text)
	soup = BeautifulSoup(response.text, 'html.parser')

	div = soup.find('div', attrs={'class': 'ep_search_results'})

	results = div.find_all('tr', class_='ep_search_result')

	# Loop through each result and extract desired information
	for result in results:
	# Extract authors
	authors = result.find_all('span', class_='person_name')
	authors_list = [author.get_text(strip=True) for author in authors]

	# Extract title and link
	title_tag = result.find('a')
	title = title_tag.get_text(strip=True)
	link = title_tag['href']

	# Extract publication details (if available)
	publication_info = result.find('td', align='center')
	document_links = [link['href'] for link in publication_info.find_all('a')]

	# Print extracted information
	print("Authors:", ", ".join(authors_list))
	print("Title:", title)
	print("Link:", link)
	print("Document Links:", ", ".join(document_links))
	print()