ksv-muralidhar · August 17, 2021 12:20
diff --git a/activity_main.xml b/activity_main.xml
 <?xml version="1.0" encoding="utf-8"?>

 <RelativeLayout xmlns:android="http://schemas.android.com/apk/res/android"
    xmlns:app="http://schemas.android.com/apk/res-auto"
    xmlns:tools="http://schemas.android.com/tools"
    android:layout_width="match_parent"
    android:layout_height="match_parent"
    tools:context=".MainActivity">

    <WebView
        android:id="@+id/webview"
        android:layout_width="match_parent"
        android:layout_height="match_parent"
        />

    <Button
        android:layout_width="fill_parent"
        android:layout_height="wrap_content"
        android:layout_alignParentBottom="true"
        android:layout_alignParentLeft="true"
        android:layout_alignParentStart="true"
        android:text="Refresh"
        android:id="@+id/refresh" />

 </RelativeLayout>

diff --git a/AndroidManifest.xml b/AndroidManifest.xml
 <?xml version="1.0" encoding="utf-8"?>
 <manifest xmlns:android="http://schemas.android.com/apk/res/android"
    package="com.example.mynews">

    <uses-permission android:name="android.permission.INTERNET" />

    <application
        android:allowBackup="true"
        android:icon="@mipmap/ic_launcher"
        android:label="@string/app_name"
        android:roundIcon="@mipmap/ic_launcher_round"
        android:supportsRtl="true"
        android:hardwareAccelerated="true"
        android:theme="@style/Theme.MyNews">
        <activity
            android:name=".MainActivity"
            android:configChanges="orientation|screenSize"
            android:exported="true">
            <intent-filter>
                <action android:name="android.intent.action.MAIN" />

                <category android:name="android.intent.category.LAUNCHER" />
            </intent-filter>
        </activity>
    </application>

 </manifest>
diff --git a/date_time_parser.py b/date_time_parser.py
 def date_time_parser(dt):
    '''
    Returns the time elapsed (in minutes) since the news was published
    
    dt: str
        published date
        
    Returns
    int: time elapsed (in minutes)
    '''
    return int(np.round((dt.now(dt.tz) - dt).total_seconds() / 60, 0))
diff --git a/elapsed_time_str.py b/elapsed_time_str.py
 def elapsed_time_str(mins):
    '''
    Returns the word form of the time elapsed (in minutes) since the news was published
    
    mins: int
        time elapsed (in minutes)
        
    Returns
    str: word form of time elapsed (in minutes)
    '''
    time_str = '' # Initializing a variable that stores the word form of time
    hours = int(mins / 60) # integer part of hours. Example: if time elapsed is 2.5 hours, then hours = 2
    days = np.round(mins / (60 * 24), 1) # days elapsed
    # minutes portion of time elapsed in hours. Example: if time elapsed is 2.5 hours, then remaining_mins = 30
    remaining_mins = int(mins - (hours * 60))
    
    if (days >= 1):
        time_str = f'{str(days)} days ago' # Example: days = 1.2 => time_str = 1.2 days ago
        if days == 1:
            time_str = 'a day ago'  # Example: days = 1 => time_str = a day ago
            
    elif (days < 1) & (hours < 24) & (mins >= 60):
        time_str = f'{str(hours)} hours and {str(remaining_mins)} mins ago' # Example: 2 hours and 15 mins ago
        if (hours == 1) & (remaining_mins > 1):
            time_str = f'an hour and {str(remaining_mins)} mins ago' # Example: an hour and 5 mins ago
        if (hours == 1) & (remaining_mins == 1):
            time_str = f'an hour and a min ago' # Example: an hour and a min ago
        if (hours > 1) & (remaining_mins == 1):
            time_str = f'{str(hours)} hours and a min ago' # Example: 5 hours and a min ago
        if (hours > 1) & (remaining_mins == 0):
            time_str = f'{str(hours)} hours ago' # Example: 4 hours ago
        if ((mins / 60) == 1) & (remaining_mins == 0):
            time_str = 'an hour ago' # Example: an hour ago
            
    elif (days < 1) & (hours < 24) & (mins == 0):
        time_str = 'Just in' # if minutes == 0 then time_str = 'Just In'
        
    else:
        time_str = f'{str(mins)} minutes ago' # Example: 5 minutes ago
        if mins == 1:
            time_str = 'a minute ago'
    return time_str
diff --git a/final_process.py b/final_process.py
 final_df = pd.DataFrame() # initializing the data frame to store all the news items from all the RSS Feed URLs
 for i in rss:
    final_df = final_df.append(news_agg(i))

 final_df.sort_values(by='elapsed_time', inplace=True) # Sorting the news items by the time elapsed (in minutes) since the news was published
 final_df['src_time'] = final_df['src'] + ('&nbsp;' * 5) + final_df['elapsed_time_str'] # concatenating the source and the string format of the elapsed time 
 final_df.drop(columns=['date', 'parsed_date', 'src', 'elapsed_time', 'elapsed_time_str'], inplace=True) 
 final_df.drop_duplicates(subset='description', inplace=True) # Dropping news items with duplicate descriptions
 final_df = final_df.loc[(final_df['title'] != ''), :].copy() # Dropping news items with no title
diff --git a/frontend.py b/frontend.py
 #################################################
 ############# FRONT END HTML SCRIPT ##############
 #################################################
 result_str = '<html><table style="border: none;"><tr style="border: none;"><td style="border: none; height: 10px;"></td></tr>'
 for n, i in final_df.iterrows(): #iterating through the search results
    href = i["url"]
    description = i["description"]
    url_txt = i["title"]
    src_time = i["src_time"]
    
    result_str += f'<a href="{href}" target="_blank" style="background-color: whitesmoke; display: block; height:100%; text-decoration: none; color: black; line-height: 1.2;">'+\
    f'<tr style="align:justify; border-left: 5px solid transparent; border-top: 5px solid transparent; border-bottom: 5px solid transparent; font-weight: bold; font-size: 18px; background-color: whitesmoke;">{url_txt}</tr></a>'+\
    f'<a href="{href}" target="_blank" style="background-color: whitesmoke; display: block; height:100%; text-decoration: none; color: dimgray; line-height: 1.25;">'+\
    f'<tr style="align:justify; border-left: 5px solid transparent; border-top: 0px; border-bottom: 5px solid transparent; font-size: 14px; padding-bottom:5px;">{description}</tr></a>'+\
    f'<a href="{href}" target="_blank" style="background-color: whitesmoke; display: block; height:100%; text-decoration: none; color: black;">'+\
    f'<tr style="border-left: 5px solid transparent; border-top: 0px; border-bottom: 5px solid transparent; color: green; font-size: 11px;">{src_time}</tr></a>'+\
    f'<tr style="border: none;"><td style="border: none; height: 10px;"></td></tr>'

 result_str += '</table></html>'

 #HTML Script to hide Streamlit menu
 # Reference: https://discuss.streamlit.io/t/how-do-i-hide-remove-the-menu-in-production/362/8
 hide_streamlit_style = """
            <style>
            #MainMenu {visibility: hidden;}
            .css-hi6a2p {padding-top: 0rem;}
            .css-1moshnm {visibility: hidden;}
            .css-kywgdc {visibility: hidden;}
            footer {visibility: hidden;}
            </style>
            """

 st.markdown(result_str, unsafe_allow_html=True)
 st.markdown(hide_streamlit_style, unsafe_allow_html=True)
diff --git a/libs.py b/libs.py
 import pandas as pd
 import numpy as np
 from bs4 import BeautifulSoup
 import requests as r
 import regex as re
 from dateutil import parser
 import streamlit as st
diff --git a/MainActivity.java b/MainActivity.java
 package com.example.mynews;

 import androidx.appcompat.app.AppCompatActivity;
 import android.os.Bundle;
 import android.view.View;
 import android.webkit.WebSettings;
 import android.webkit.WebView;
 import android.widget.Button;

 public class MainActivity extends AppCompatActivity {
    private WebView webview;

    @Override
    protected void onCreate(Bundle savedInstanceState) {
        super.onCreate(savedInstanceState);
        setContentView(R.layout.activity_main);

        WebView myWebView = (WebView) findViewById(R.id.webview);
        WebSettings webSettings = myWebView.getSettings();
        webSettings.setJavaScriptEnabled(true);
        myWebView.loadUrl("https://www.website.com"); /*Replace it with your website URL*/

        Button refresh = (Button) findViewById(R.id.refresh);
        refresh.setOnClickListener(new View.OnClickListener() {
            public void onClick(View v) {
                myWebView.reload();
            }
        });
    }
 }
diff --git a/news_agg.py b/news_agg.py
 def news_agg(rss):
    '''
    Processes each RSS Feed URL passed as an input argument
    
    rss: str
         RSS feed URL
         
    Returns
    DataFrame: data frame of data processed from the passed RSS Feed URL
    '''
    rss_df = pd.DataFrame() # Initializing an empty data frame
    # Response from HTTP request
    resp = r.get(rss, headers = {"user-agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36"})
    b = BeautifulSoup(resp.content, "xml") # Parsing the HTTP response
    items = b.find_all("item") # Storing all the news items
    for i in items:
        rss_df = rss_df.append(rss_parser(i)).copy() # parsing each news item (<item>)
    rss_df["description"] = rss_df["description"].replace([" NULL", ''], np.nan) # Few items have 'NULL' as description so replacing NULL with NA
    rss_df.dropna(inplace=True)  # dropping news items with either of title, URL, description or date, missing
    rss_df["src"] = src_parse(rss) # extracting the source name from RSS feed URL
    rss_df["elapsed_time"] = rss_df["parsed_date"].apply(date_time_parser) # Computing the time elapsed (in minutes) since the news was published
    rss_df["elapsed_time_str"] = rss_df["elapsed_time"].apply(elapsed_time_str) # Converting the the time elapsed (in minutes) since the news was published into string format
    return rss_df
diff --git a/rss_feed.rss b/rss_feed.rss
 <item>
 <title>Title of the news item</title>
 <description>Brief description of a news item</description>
 <link>URL of the news article</link>
 <pubDate>Published date of a news article</pubDate>
 </item>
diff --git a/rss_list.py b/rss_list.py
 ########################################
 ######## LIST OF RSS FEED URLs #########
 ########################################
 rss = ['https://www.economictimes.indiatimes.com/rssfeedstopstories.cms',
      'http://feeds.feedburner.com/ndtvprofit-latest?format=xml',
      'https://www.thehindubusinessline.com/news/feeder/default.rss',
      'https://www.moneycontrol.com/rss/latestnews.xml',
      'https://www.livemint.com/rss/news',
      'https://www.financialexpress.com/feed/',
      'https://www.business-standard.com/rss/latest.rss',
      'https://www.businesstoday.in/rssfeeds/?id=225346',
      'https://www.zeebiz.com/latest.xml/feed']
diff --git a/rss_parser.py b/rss_parser.py
 def rss_parser(i):
    '''
    Processes an individual news item.
    
    i: bs4.element.Tag
       single news item (<item>) of an RSS Feed
    
    Returns
    DataFrame: data frame of a processed news item (title, url, description, date, parsed_date)
    '''
    b1 = BeautifulSoup(str(i),"xml") # Parsing a news item (<item>) to BeautifulSoup object
    
    title = "" if b1.find("title") is None else b1.find("title").get_text() # If <title> is absent then title = ""
    title = text_clean(title) # cleaning title
    
    url = "" if b1.find("link") is None else b1.find("link").get_text() # If <link> is absent then url = "". url is the URL of the news article
    
    desc = "" if b1.find("description") is None else b1.find("description").get_text() # If <description> is absent then desc = "". desc is the short description of the news article
    desc = text_clean(desc) # cleaning the description
    desc = f'{desc[:300]}...' if len(desc) >= 300 else desc # limiting the length of description to 300 chars
    
    # If <pubDate> i.e. published date is absent then date is some random date 11 yesrs ago so the the article appears at the end
    date = "Sat, 12 Aug 2000 13:39:15 +0530" if b1.find("pubDate") is None else b1.find("pubDate").get_text()
    
    if url.find("businesstoday.in") >=0: # Time zone in the feed of 'businesstoday.in' is wrong, hence, correcting it
        date = date.replace("GMT", "+0530")
    
    date1 = parser.parse(date) # parsing the date to Timestamp object
    
    # data frame of the processed data
    return pd.DataFrame({"title": title,
                        "url": url,
                        "description": desc,
                        "date": date,
                        "parsed_date": date1}, index=[0])
diff --git a/src_parse.py b/src_parse.py
 def src_parse(rss):
    '''
    Returns the source (root domain of RSS feed) from the RSS feed URL.
    
    rss: str
         RSS feed URL
         
    Returns
    str: root domain of RSS feed URL
    '''
    # RSS feed URL of NDTV profit (http://feeds.feedburner.com/ndtvprofit-latest?format=xml) doesn't contain NDTV's root domain
    if rss.find('ndtvprofit') >= 0: 
        rss = 'ndtv profit'
    rss = rss.replace("https://www.", "") # removing "https://www." from RSS feed URL
    rss = rss.split("/") # splitting the remaining portion of RSS feed URL by '/'
    return rss[0] # first element/item of the split RSS feed URL is the root domain
diff --git a/strings.xml b/strings.xml
 <resources>
    <string name="app_name">News</string>
    <string name="app_description" translatable="false">News Aggregator developed by KSV Muralidhar using Streamlit and Heroku</string>
    <string name="developer">KSV Muralidhar</string>
 </resources>
diff --git a/text_clean.py b/text_clean.py
 def text_clean(desc):
    '''
    Returns cleaned text by removing the unparsed HTML characters from a news item's description/title
    
    dt: str
        description/title of a news item
        
    Returns
    str: cleaned description/title of a news item
    '''
    desc = desc.replace("&lt;", "<")
    desc = desc.replace("&gt;", ">")
    desc = re.sub("<.*?>", "", desc) # Removing HTML tags from the description/title
    desc = desc.replace("#39;", "'")
    desc = desc.replace('&quot;', '"')
    desc = desc.replace('&nbsp;', '"')
    desc = desc.replace('#32;', ' ')
    return desc
diff --git a/themes.xml b/themes.xml
 <resources xmlns:tools="http://schemas.android.com/tools">
    <!-- Base application theme. -->
    <style name="Theme.MyNews" parent="Theme.MaterialComponents.DayNight.DarkActionBar">
        <!-- Primary brand color. -->

        <item name="colorPrimary">@color/black</item>
        <item name="colorPrimaryVariant">@color/black</item>
        <item name="colorOnPrimary">@color/white</item>

        <!-- Secondary brand color. -->
        <item name="colorSecondary">@color/black</item>
        <item name="colorSecondaryVariant">@color/black</item>
        <item name="colorOnSecondary">@color/white</item>
        <!-- Status bar color. -->
        <item name="android:statusBarColor" tools:targetApi="l">?attr/colorPrimaryVariant</item>
        <!-- Customize your theme here. -->
    </style>
    
 </resources>
	<?xml version="1.0" encoding="utf-8"?>

	<RelativeLayout xmlns:android="http://schemas.android.com/apk/res/android"
	xmlns:app="http://schemas.android.com/apk/res-auto"
	xmlns:tools="http://schemas.android.com/tools"
	android:layout_width="match_parent"
	android:layout_height="match_parent"
	tools:context=".MainActivity">

	<WebView
	android:id="@+id/webview"
	android:layout_width="match_parent"
	android:layout_height="match_parent"
	/>

	<Button
	android:layout_width="fill_parent"
	android:layout_height="wrap_content"
	android:layout_alignParentBottom="true"
	android:layout_alignParentLeft="true"
	android:layout_alignParentStart="true"
	android:text="Refresh"
	android:id="@+id/refresh" />

	</RelativeLayout>
	<?xml version="1.0" encoding="utf-8"?>
	<manifest xmlns:android="http://schemas.android.com/apk/res/android"
	package="com.example.mynews">

	<uses-permission android:name="android.permission.INTERNET" />

	<application
	android:allowBackup="true"
	android:icon="@mipmap/ic_launcher"
	android:label="@string/app_name"
	android:roundIcon="@mipmap/ic_launcher_round"
	android:supportsRtl="true"
	android:hardwareAccelerated="true"
	android:theme="@style/Theme.MyNews">
	<activity
	android:name=".MainActivity"
	android:configChanges="orientation\|screenSize"
	android:exported="true">
	<intent-filter>
	<action android:name="android.intent.action.MAIN" />

	<category android:name="android.intent.category.LAUNCHER" />
	</intent-filter>
	</activity>
	</application>

	</manifest>
	def date_time_parser(dt):
	'''
	Returns the time elapsed (in minutes) since the news was published

	dt: str
	published date

	Returns
	int: time elapsed (in minutes)
	'''
	return int(np.round((dt.now(dt.tz) - dt).total_seconds() / 60, 0))
	def elapsed_time_str(mins):
	'''
	Returns the word form of the time elapsed (in minutes) since the news was published

	mins: int
	time elapsed (in minutes)

	Returns
	str: word form of time elapsed (in minutes)
	'''
	time_str = '' # Initializing a variable that stores the word form of time
	hours = int(mins / 60) # integer part of hours. Example: if time elapsed is 2.5 hours, then hours = 2
	days = np.round(mins / (60 * 24), 1) # days elapsed
	# minutes portion of time elapsed in hours. Example: if time elapsed is 2.5 hours, then remaining_mins = 30
	remaining_mins = int(mins - (hours * 60))

	if (days >= 1):
	time_str = f'{str(days)} days ago' # Example: days = 1.2 => time_str = 1.2 days ago
	if days == 1:
	time_str = 'a day ago' # Example: days = 1 => time_str = a day ago

	elif (days < 1) & (hours < 24) & (mins >= 60):
	time_str = f'{str(hours)} hours and {str(remaining_mins)} mins ago' # Example: 2 hours and 15 mins ago
	if (hours == 1) & (remaining_mins > 1):
	time_str = f'an hour and {str(remaining_mins)} mins ago' # Example: an hour and 5 mins ago
	if (hours == 1) & (remaining_mins == 1):
	time_str = f'an hour and a min ago' # Example: an hour and a min ago
	if (hours > 1) & (remaining_mins == 1):
	time_str = f'{str(hours)} hours and a min ago' # Example: 5 hours and a min ago
	if (hours > 1) & (remaining_mins == 0):
	time_str = f'{str(hours)} hours ago' # Example: 4 hours ago
	if ((mins / 60) == 1) & (remaining_mins == 0):
	time_str = 'an hour ago' # Example: an hour ago

	elif (days < 1) & (hours < 24) & (mins == 0):
	time_str = 'Just in' # if minutes == 0 then time_str = 'Just In'

	else:
	time_str = f'{str(mins)} minutes ago' # Example: 5 minutes ago
	if mins == 1:
	time_str = 'a minute ago'
	return time_str
	final_df = pd.DataFrame() # initializing the data frame to store all the news items from all the RSS Feed URLs
	for i in rss:
	final_df = final_df.append(news_agg(i))

	final_df.sort_values(by='elapsed_time', inplace=True) # Sorting the news items by the time elapsed (in minutes) since the news was published
	final_df['src_time'] = final_df['src'] + (' ' * 5) + final_df['elapsed_time_str'] # concatenating the source and the string format of the elapsed time
	final_df.drop(columns=['date', 'parsed_date', 'src', 'elapsed_time', 'elapsed_time_str'], inplace=True)
	final_df.drop_duplicates(subset='description', inplace=True) # Dropping news items with duplicate descriptions
	final_df = final_df.loc[(final_df['title'] != ''), :].copy() # Dropping news items with no title
	#################################################
	############# FRONT END HTML SCRIPT ##############
	#################################################
	result_str = '<html><table style="border: none;"><tr style="border: none;"><td style="border: none; height: 10px;"></td></tr>'
	for n, i in final_df.iterrows(): #iterating through the search results
	href = i["url"]
	description = i["description"]
	url_txt = i["title"]
	src_time = i["src_time"]

	result_str += f'<a href="{href}" target="_blank" style="background-color: whitesmoke; display: block; height:100%; text-decoration: none; color: black; line-height: 1.2;">'+\
	f'<tr style="align:justify; border-left: 5px solid transparent; border-top: 5px solid transparent; border-bottom: 5px solid transparent; font-weight: bold; font-size: 18px; background-color: whitesmoke;">{url_txt}</tr></a>'+\
	f'<a href="{href}" target="_blank" style="background-color: whitesmoke; display: block; height:100%; text-decoration: none; color: dimgray; line-height: 1.25;">'+\
	f'<tr style="align:justify; border-left: 5px solid transparent; border-top: 0px; border-bottom: 5px solid transparent; font-size: 14px; padding-bottom:5px;">{description}</tr></a>'+\
	f'<a href="{href}" target="_blank" style="background-color: whitesmoke; display: block; height:100%; text-decoration: none; color: black;">'+\
	f'<tr style="border-left: 5px solid transparent; border-top: 0px; border-bottom: 5px solid transparent; color: green; font-size: 11px;">{src_time}</tr></a>'+\
	f'<tr style="border: none;"><td style="border: none; height: 10px;"></td></tr>'

	result_str += '</table></html>'

	#HTML Script to hide Streamlit menu
	# Reference: https://discuss.streamlit.io/t/how-do-i-hide-remove-the-menu-in-production/362/8
	hide_streamlit_style = """
	<style>
	#MainMenu {visibility: hidden;}
	.css-hi6a2p {padding-top: 0rem;}
	.css-1moshnm {visibility: hidden;}
	.css-kywgdc {visibility: hidden;}
	footer {visibility: hidden;}
	</style>
	"""

	st.markdown(result_str, unsafe_allow_html=True)
	st.markdown(hide_streamlit_style, unsafe_allow_html=True)
	import pandas as pd
	import numpy as np
	from bs4 import BeautifulSoup
	import requests as r
	import regex as re
	from dateutil import parser
	import streamlit as st
	package com.example.mynews;

	import androidx.appcompat.app.AppCompatActivity;
	import android.os.Bundle;
	import android.view.View;
	import android.webkit.WebSettings;
	import android.webkit.WebView;
	import android.widget.Button;

	public class MainActivity extends AppCompatActivity {
	private WebView webview;

	@Override
	protected void onCreate(Bundle savedInstanceState) {
	super.onCreate(savedInstanceState);
	setContentView(R.layout.activity_main);

	WebView myWebView = (WebView) findViewById(R.id.webview);
	WebSettings webSettings = myWebView.getSettings();
	webSettings.setJavaScriptEnabled(true);
	myWebView.loadUrl("https://www.website.com"); /Replace it with your website URL/

	Button refresh = (Button) findViewById(R.id.refresh);
	refresh.setOnClickListener(new View.OnClickListener() {
	public void onClick(View v) {
	myWebView.reload();
	}
	});
	}
	}
	def news_agg(rss):
	'''
	Processes each RSS Feed URL passed as an input argument

	rss: str
	RSS feed URL

	Returns
	DataFrame: data frame of data processed from the passed RSS Feed URL
	'''
	rss_df = pd.DataFrame() # Initializing an empty data frame
	# Response from HTTP request
	resp = r.get(rss, headers = {"user-agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36"})
	b = BeautifulSoup(resp.content, "xml") # Parsing the HTTP response
	items = b.find_all("item") # Storing all the news items
	for i in items:
	rss_df = rss_df.append(rss_parser(i)).copy() # parsing each news item (<item>)
	rss_df["description"] = rss_df["description"].replace([" NULL", ''], np.nan) # Few items have 'NULL' as description so replacing NULL with NA
	rss_df.dropna(inplace=True) # dropping news items with either of title, URL, description or date, missing
	rss_df["src"] = src_parse(rss) # extracting the source name from RSS feed URL
	rss_df["elapsed_time"] = rss_df["parsed_date"].apply(date_time_parser) # Computing the time elapsed (in minutes) since the news was published
	rss_df["elapsed_time_str"] = rss_df["elapsed_time"].apply(elapsed_time_str) # Converting the the time elapsed (in minutes) since the news was published into string format
	return rss_df
	<item>
	<title>Title of the news item</title>
	<description>Brief description of a news item</description>
	<link>URL of the news article</link>
	<pubDate>Published date of a news article</pubDate>
	</item>