# Author: Rubinigg Michael
# Contributor: Rubinigg Michael
# Abstract: Parses all dates in a column of a CSV file
# Version 1.0
# Release date: 2024-10-31
# Place: Graz, Austria, EU
# Company: dr Mag. rer. nat. Michael Rubinigg
# Programming Language: Python 3.11
# Rights:  Creative Commons Attribution 4.0 (CC-BY)

import pandas as pd
from dateutil import parser
import tkinter as tk
from tkinter import filedialog, simpledialog, ttk
import os

def parse_dates(selected_file, output_directory, output_filename, date_column, separator_input):

    output_path = os.path.join(output_directory, output_filename + '.csv')

    # create output directory if it doesn't exist
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)

    # Load the CSV file into a DataFrame
    df = pd.read_csv(selected_file, sep=separator_input)

    # Check if the date column exists in the DataFrame
    if date_column not in df.columns:
        print(f"Column '{date_column}' not found in the CSV file.")
        return

    # Parse the date column and convert to desired format
    def parse_date(value):
        try:
            # Use dateutil's parser to handle various date formats
            parsed_date = parser.parse(str(value))
            # Convert to string format with time if available, otherwise just date
            if parsed_date.time() == parsed_date.time().min:
                return parsed_date.strftime("%Y-%m-%d")
            else:
                return parsed_date.strftime("%Y-%m-%d %H:%M:%S")
        except Exception as e:
           return value

    # Apply the date parsing function to the specified column and replace missing values with NULL
    df[date_column] = df[date_column].apply(parse_date)
    df.fillna('NULL', inplace=True)

    # Save the updated DataFrame to a new CSV file
    df.to_csv(output_path, sep=';', index=False)

def select_file(title_text):
    file_path = filedialog.askopenfilename(
        title = title_text,
        filetypes= [('CSV files', '*.csv')],
    )
    return file_path

def select_directory(title_text):
    output_directory = filedialog.askdirectory(
        title = title_text
    )
    return output_directory

def get_text(title_text, prompt_text):
    output_text = simpledialog.askstring(
        title = title_text,
        prompt = prompt_text
    )
    return output_text

def get_listvalue(options, title_text):

    # Create a main window
    root = tk.Tk()
    root.attributes('-topmost', True)
    root.eval('tk::PlaceWindow . center')

    # create label
    label = ttk.Label(text=title_text)
    label.pack(padx=5, pady=5)

    # Create a StringVar to hold the selected value
    selected_value = tk.StringVar()

    # Create a combobox
    combobox = ttk.Combobox(root, textvariable=selected_value)
    combobox['values'] = options
    combobox.current(0)  # Set the default selection to the first item
    combobox.pack(padx=10, pady=10)

    # Function to handle the button click
    def on_submit():
        root.destroy()  # Close the window

    # Create a submit button
    submit_button = tk.Button(root, text="Submit", command=on_submit)
    submit_button.pack(padx=10, pady=10)

    # Run the application
    root.mainloop()

    # Return the selected value
    return selected_value.get()

def main ():

    # select source file
    selected_file = select_file('Select a source file for the data')

    # select output directory
    selected_output_directory = select_directory('Select Output Directory')

    # Get user-defined output filename
    output_filename = get_text('Name of the output file', "Enter the output filename (excluding extension):")

    # Get user-defined output filename
    date_column = get_text('Name of the column', "Enter the column name to be parsed:")

    # get the column delimiter character
    separator_input = get_listvalue([',', ';'], 'Select the column separator in the input file')

    parse_dates(selected_file, selected_output_directory, output_filename, date_column, separator_input)

if __name__ == '__main__':
    main()