# General
import datetime as dt
import pandas as pd
import re
import datetime
import json

# Scraping
import bs4 as bs
import requests

# Plotting
import plotly.io as pio
import plotly.graph_objects as go
import dash_core_components as dcc

# Database
import psycopg2

from .constants import DB_HOST, DB_PASS, DB_NAME, DB_USER

def get_lat(x):
    return json.loads(x['st_asgeojson'])['coordinates'][1]

def get_long(x):
    return json.loads(x['st_asgeojson'])['coordinates'][0]

def get_closest_ground_station_historical_data(lat, long, timestamp, pollutant):
    """
    Function that extracts pollutant measurements from the database for the nearest ground station based on location and
    time and desired pollutant
    :param lat: float, Latitude
    :param long: float, Longitude
    :param timestamp: datetime timestamp
    :param pollutant: string, string indicating the pollutant for which data has to be extracted. Available pollutants:
    co_conc, o3_conc, no2_conc, pm2p5_conc, pm10_conc, so2_conc, no_conc.
    :return: df_data: Dataframe containing the measurements taken from the closest available ground stations, distance:
    distance to the closest ground station, name: Official EPA name of the ground station, if available.
    """

    pollutant_columns = { # dict with the pollutants names as used for historical data in the database
        'co_conc': 'co',
        'o3_conc': 'o3',
        'no2_conc': 'no2',
        'pm2p5_conc': 'pm25',
        'pm10_conc': 'pm10',
        'so2_conc': 'so2',
        'no_conc': 'no',
    }

    ppb_to_micro = {  # some data needs to be converted from ppb, this dict contains the conversion factors
        'o3_conc': 2,
        'no2_conc': 1.88,
        'no_conc': 1.25,
	'co_conc':1000,
    }

    pollutant_col = pollutant_columns[pollutant]  # store the database name of the pollutant in separate variable

    # Connect to the database
    conn = psycopg2.connect(dbname=DB_NAME, user=DB_USER, password=DB_PASS, host=DB_HOST)

    df = pd.read_sql_query(  # selects the three closests ground stations to the location of the fire event
        f"""SELECT  *,
                    ST_Distance(ST_SetSRID(ST_Point({long}, {lat}), 4326), ST_SetSRID(geometry, 4326), true) AS distance
                    FROM ground_stations
                    WHERE (ground_stations.{pollutant_col}_hist = true)
                    ORDER BY ST_SetSRID(geometry, 4326)  <-> ST_SetSRID(ST_Point({long}, {lat}), 4326) 
                    LIMIT 3;""",
        conn)

    # Set the date range for the period during which the fire event took place
    begin_fire_event = timestamp - dt.timedelta(days=8)
    end_fire_event = timestamp + dt.timedelta(days=8)

    start_date_string = begin_fire_event.strftime('%Y-%m-%d %H')  # Convert to string to place in SQL query
    end_date_string = end_fire_event.strftime('%Y-%m-%d %H')

    for ind, row in df.iterrows(): # go over each of the closest ground stations

        # make some adjustments to fit ground station column names used in the database
        col_name = row['data_name'].lower().replace(' ', '_').replace("'", '')
        distance = row['distance']  # distance from the fire event in meters

        if not row['epa_name'] is None:
            name = row['epa_name']  # use the officual EPA name if available (taken from AirQuality.ie)
        else:
            name = row['data_name'].replace('_', ' ').title()  # if no official EPA name is available use the data name

        df_data = pd.read_sql_query(  # Select the data for the station for the fire event period
            f"""SELECT datetime, {col_name} FROM ground_meas_{pollutant}
                    WHERE datetime BETWEEN '{start_date_string}:00:00'::timestamp
                 AND '{end_date_string}:00:00'::timestamp;""",
            conn)

        if df_data.empty: # if no data was retrieved try the next ground station
            continue

        if df_data[col_name].isnull().sum() / len(df_data[col_name]) <= .5:  # at least 50% of the values are not null
            df_data = df_data.rename(columns={col_name: 'ground_station_data'})

            if pollutant in ['o3_conc', 'no2_conc', 'no_conc', 'co_conc']:  # apply conversion if needed
                df_data['ground_station_data'] = df_data['ground_station_data'] * ppb_to_micro[pollutant]
            conn.close()
            return [df_data, distance, name]
        else:
            continue
    conn.close()
    return None  # if no data was retrieved return None


def get_closest_active_epa_ground_station(lat, long, pollutant):
    """
        Function that extracts closest active EPA ground station

        :param lat: float, Latitude
        :param long: float, Longitude
        :param pollutant: string, string indicating the pollutant for which data has to be extracted. Available pollutants:
        co_conc, o3_conc, no2_conc, pm2p5_conc, pm10_conc, so2_conc, no_conc.
        :return: dataframe containing details about the closest ground station.
    """

    if pollutant == 'no_conc':
        return None

    pollutant_columns = { # dict with the pollutants names as used for historical data in the database
        'co_conc': 'co',
        'o3_conc': 'o3',
        'no2_conc': 'no2',
        'pm2p5_conc': 'pm25',
        'pm10_conc': 'pm10',
        'so2_conc': 'so2',
        'no_conc': 'no',
    }

    pollutant_col = pollutant_columns[pollutant]

    conn = psycopg2.connect(dbname=DB_NAME, user=DB_USER, password=DB_PASS, host=DB_HOST)  # connect to the database

    df = pd.read_sql_query(  # Select the available information for the closest ground station
        f"""SELECT  *,
                    ST_Distance(ST_SetSRID(ST_Point({long}, {lat}), 4326), ST_SetSRID(geometry, 4326), true) AS distance
                    FROM ground_stations
                    WHERE ground_stations.{pollutant_col} = true
                    ORDER BY ST_SetSRID(geometry, 4326)  <-> ST_SetSRID(ST_Point({long}, {lat}), 4326) 
                    LIMIT 1;""",
        conn)
    conn.close()

    return df


def get_epa_data(df, timestamp, pollutant):
    """
            Function that scrapes pollutant concentration data from AirQuality.ie for a given ground station and
            time period

            :param df: dataframe containing the details of the ground station for which the data is to be scraped
            :param timestamp: datetime timestamp
            :param pollutant: string, string indicating the pollutant for which data has to be extracted.
            Available pollutants: co_conc, o3_conc, no2_conc, pm2p5_conc, pm10_conc, so2_conc, no_conc.
            :return: dataframe containing pollution data scraped from AirQuality.ie
        """

    pollutants = {  # dict with the pollutants names as used for historical data in the database
        'co_conc': 'CO',
        'o3_conc': 'O3',
        'no2_conc': 'NO2',
        'pm2p5_conc': 'PM25',
        'pm10_conc': 'PM10',
        'so2_conc': 'SO2',
    }

    # Set the date range for the period during which the fire event took place
    begin_fire_event = timestamp - dt.timedelta(days=7)
    end_fire_event = timestamp + dt.timedelta(days=7)

    start_date_string = begin_fire_event.strftime('%d+%b+%Y')  # Convert to string to place in the url
    end_date_string = end_fire_event.strftime('%d+%b+%Y')

    epa_code = df['epa_code'].iloc[0]  # code of the ground station

    # construct url to scrape
    url = f"https://airquality.ie/readings?station={epa_code}&dateFrom={start_date_string}&dateTo={end_date_string}"

    headers = {
        'User-Agent': 'Mozilla/5.0',
        'Accept-Encoding': 'identity',
    }

    return None

    page = requests.get(url, headers=headers)  # request the page

    soup = bs.BeautifulSoup(page.text, 'lxml') # parse the response

    graph = soup.findAll(
        'script',
        attrs={
            'type': 'text/javascript'}
    )  # find all the elements of text/javascript type

    if len(graph) == 0:
        return None  # no elements were found, so no data available

    # Currently the data shows in the graph is found in the last element text/javascript
    test = bs.BeautifulSoup(graph[-1].contents[0], 'lxml')

    data = {}

    # Use regex to select the pollutant data from the javascript parsed from the page
    names = re.findall('name:\"([A-Za-z0-9\.]*)\"', test.text.replace(
        "\n", "").replace(
        "\'", '"').replace(
        " ", "").replace("null", "None"), re.DOTALL)

    data_columns = re.findall("data:(\[[^/]*)//endforeach]", test.text.replace(
        "\n", "").replace(
        "\'", '"').replace(
        " ", "").replace("null", "None"), re.DOTALL)

    for ind, name in enumerate(names): # remove some unwanted JS and convert string to python list
        data[name] = eval(data_columns[ind].replace('Date.UTC', '') + ']')

    for key in data:  #  construct a dictionary with the data
        data[key] = {datetime.datetime(year=x[0][0], month=(x[0][1]) + 1, day=x[0][2], hour=12): x[1] for x in
                     data[key]}

    try:
        # convert to pandas dataframe, convert datatype to float and return the df
        pol_name = pollutants[pollutant]
        data = pd.DataFrame(index=list(data[pol_name].keys()), data=list(data[pol_name].values()), columns=[pol_name])
        data[pol_name] = data[pol_name].astype(float)
        if pol_name == 'CO':
            data[pol_name] = data[pol_name] * 1000  # CO values need to be converted, are given in mg, no microgram

        return data
    except:
        return None


def create_plot(pollutant, df, timestamp, name=None, distance=None, epa_data=None):
    """
    Created a graph combining the data from different data sources to give an overview of the pollution
    concentration observed during the fire event
    :param pollutant: str, pollutant being analysed
    :param df: dataframe containing the pollution data taken from the database
    :param timestamp: timestamp of when the fire was first observed
    :param name: name of the nearest ground station, if available
    :param distance: distance to the nearest ground station, if available
    :param epa_data: data scraped from AirQuality.ie, if available
    :return: plotly fig
    """

    pollutant_columns = {  # dict with the pollutants names as used for naming purposes
        'co_conc': 'CO',
        'o3_conc': 'O<sub>3</sub>',
        'no2_conc': 'NO<sub>2</sub>',
        'pm2p5_conc': 'PM25',
        'pm10_conc': 'PM10',
        'so2_conc': 'SO<sub>2</sub>',
        'no_conc': 'NO',
    }

    fig = go.Figure()  # init a plotly figure
    fig.update_layout(template=pio.templates["plotly_dark"])  # use the standard plotly dark template
    fig.update_layout(xaxis_title="Date",
                      yaxis_title=f"{pollutant_columns[pollutant]} Concentration µg m<sup>-3</sup>")  # axis titles

    fig.update_layout(legend=dict(  # position the legend
        yanchor="top",
        y=0.99,
        xanchor="left",
        x=0.01))

    colors = ["#29bf12", "#abff4f", "#08bdbd", "#f21b3f", "#ff9914"]

    fig.add_trace(go.Scatter(  # add the CAMS analysis concentration data closest to the fireevent
        x=df.index,
        y=df['values_exact_loc'],
        mode='lines',
        name='Exact Location',
        line={'color': colors[3]},

    ))

    fig.add_trace(go.Scatter(  # add the baseline values
        x=df.index,
        y=df['temporal_baseline'],
        mode='lines',
        name='Average Concentration Levels over for the same period (2015-2020)',
        line={'color': colors[1]},
    ))

    if epa_data is not None:  # Add trace with ground measurements, if EPA data is available

        if distance > 1000:
            distance = round(distance / 1000)
            trace_name = f'Nearest Ground Measurement Station ({name}, distance: {distance}KM)'
        else:
            distance = round(distance)
            trace_name = f'Nearest Ground Measurement Station ({name}, distance: {distance}M)'

        fig.add_trace(go.Scatter(  # add to the graph
            x=epa_data.index,
            y=epa_data[epa_data.columns[0]],
            mode='lines',
            name=trace_name,
            line={'color': colors[2]}))

    if 'ground_station_data' in df.columns:
        if distance > 1000:
            distance = round(distance / 1000)
            trace_name = f'Nearest Ground Measurement Station ({name}, distance: {distance}KM)'
        else:
            distance = round(distance)
            trace_name = f'Nearest Ground Measurement Station ({name}, distance: {distance}M)'

        fig.add_trace(go.Scatter(  # add the baseline values
            x=df.index,
            y=df['ground_station_data'],
            mode='lines',
            name=trace_name,
            line={'color': colors[2]}))

    #     fig.add_trace(go.Scatter(  # add the average CAMS analysis concentration data for the surrounding area
    #         x=df.index,
    #         y=df['values_surrounding_area'],
    #         mode='lines',
    #         name='Surrounding Area (100x100km)',
    #         line={'color':colors[4]},

    #     ))
    #     fig.add_trace(go.Scatter(  # add the baseline values
    #         x=df.index,
    #         y=df['seasonal_baseline'],
    #         mode='lines',
    #         name='Baseline',
    #     line={'color':colors[2]},
    #     ))

    #     fig.add_trace(go.Scatter(  # add the baseline values
    #         x=hist_baseline_no_masking['time'],
    #         y=hist_baseline_no_masking['pollutant_conc'],
    #         mode='lines',
    #         name='Average Concentration levels during other years (2015-2020) No Mask',
    #         line={'color':colors[0]},
    #     ))

    # calculate the min and max values present within the datasets to set the range of the y-axis
    if epa_data is not None:
        max_val = max(df.max().max(), epa_data[epa_data.columns[0]].max())
        min_val = min(df.min().min(), epa_data[epa_data.columns[0]].min())
    else:
        max_val = df.max().max()
        min_val = df.min().min()

    val_range = max_val - min_val
    offset = val_range * 0.35  # small offset to prevent the plot from being too packed together

    fig.update_layout(yaxis_range=[min_val - offset, max_val + offset])

    # add a rectangle to illustrate the observation period related to the fire
    begin_fire_event = timestamp - dt.timedelta(hours=24)
    end_fire_event = timestamp + dt.timedelta(hours=24)
    fig.add_vrect(x0=begin_fire_event, x1=end_fire_event,
                  # y0=min_val - offset,
                  # y1=max_val + offset,
                  fillcolor="orange", opacity=0.25, line_width=0)
    # add text to indicate that the rectangle illustrates the fire
    fig.add_annotation(text='Fire', x=begin_fire_event + dt.timedelta(hours=24), y=min_val - (offset * 0.5),
                       showarrow=False)

    # remove the margins
    fig.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0})

    # return the figure as a dcc graph object
    return dcc.Graph(id='fe_plot', figure=fig)
