Create Dash tool to upload, filter and plot data

I am looking to create a tool in the Dash environment that allows me to upload a datafile, filter/select the data I want and plot the selected data. To do this I tried to combine two documentation codes, one for uploading and plotting data and one for filtering out data.

The code for uploading and plotting data:

import base64
import datetime
import io
import sys
import plotly.graph_objs as go
import cufflinks as cf

import dash
from dash.dependencies import Input, Output, State
from dash import dcc
from dash import html
from dash import dash_table

import pandas as pd

external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']

app = dash.Dash(__name__, external_stylesheets=external_stylesheets)
server = app.server

colors = {
"graphBackground": "#F5F5F5",
"background": "#ffffff",
"text": "#000000"
}

app.layout = html.Div([
dcc.Upload(
    id='upload-data',
    children=html.Div([
        'Drag and Drop or ',
        html.A('Select Files')
    ]),
    style={
        'width': '100%',
        'height': '60px',
        'lineHeight': '60px',
        'borderWidth': '1px',
        'borderStyle': 'dashed',
        'borderRadius': '5px',
        'textAlign': 'center',
        'margin': '10px'
    },
    # Allow multiple files to be uploaded
    multiple=True
),
dcc.Graph(id='Mygraph'),
html.Div(id='output-data-upload')
])

def parse_data(contents, filename):
content_type, content_string = contents.split(',')

decoded = base64.b64decode(content_string)
try:
    if 'csv' in filename:
        # Assume that the user uploaded a CSV or TXT file
        df = pd.read_csv(
            io.StringIO(decoded.decode('utf-8')))
    elif 'xls' in filename:
        # Assume that the user uploaded an excel file
        df = pd.read_excel(io.BytesIO(decoded))
    elif 'txt' or 'tsv' in filename:
        # Assume that the user upl, delimiter = r'\s+'oaded an excel file
        df = pd.read_csv(
            io.StringIO(decoded.decode('utf-8')), delimiter = r'\s+')
except Exception as e:
    print(e)
    return html.Div([
        'There was an error processing this file.'
    ])

return df

@app.callback(Output('Mygraph', 'figure'), [
Input('upload-data', 'contents'),
Input('upload-data', 'filename')
])
def update_graph(contents, filename):
t = []
x = []
y = []
if contents:
    contents = contents[0]
    filename = filename[0]
    df = parse_data(contents, filename)
    # df = df.set_index(df.columns[0])
    t=df['time']
    x=df['x_coordinate']
    y=df['y_coordinate']
fig = go.Figure(
    data=[
        go.Scatter(
            x=t, 
            y=y, 
            mode='lines+markers')
        ],
    layout=go.Layout(
        plot_bgcolor=colors["graphBackground"],
        paper_bgcolor=colors["graphBackground"]
    ))

return fig

@app.callback(Output('output-data-upload', 'children'),
        [
            Input('upload-data', 'contents'),
            Input('upload-data', 'filename')
        ])
def update_table(contents, filename):
table = html.Div()

if contents:
    contents = contents[0]
    filename = filename[0]
    df = parse_data(contents, filename)

    table = html.Div([
        html.H5(filename),
        dash_table.DataTable(
            data=df.to_dict('rows'),
            columns=[{'name': i, 'id': i} for i in df.columns]
        ),
        html.Hr(),
        html.Div('Raw Content'),
        html.Pre(contents[0:200] + '...', style={
            'whiteSpace': 'pre-wrap',
            'wordBreak': 'break-all'
        })
    ])

return table


if __name__ == '__main__':
app.run_server(debug=True)

The code for filtering data:

from dash import Dash, dash_table, dcc, html
from dash.dependencies import Input, Output
import pandas as pd

df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/gapminder2007.csv')
# add an id column and set it as the index
# in this case the unique ID is just the country name, so we could have just
# renamed 'country' to 'id' (but given it the display name 'country'), but
# here it's duplicated just to show the more general pattern.
df['id'] = df['country']
df.set_index('id', inplace=True, drop=False)

app = Dash(__name__)

app.layout = html.Div([
dash_table.DataTable(
    id='datatable-row-ids',
    columns=[
        {'name': i, 'id': i, 'deletable': True} for i in df.columns
        # omit the id column
        if i != 'id'
    ],
    data=df.to_dict('records'),
    editable=True,
    filter_action="native",
    sort_action="native",
    sort_mode='multi',
    row_selectable='multi',
    row_deletable=True,
    selected_rows=[],
    page_action='native',
    page_current= 0,
    page_size= 10,
),
html.Div(id='datatable-row-ids-container')
])


@app.callback(
Output('datatable-row-ids-container', 'children'),
Input('datatable-row-ids', 'derived_virtual_row_ids'),
Input('datatable-row-ids', 'selected_row_ids'),
Input('datatable-row-ids', 'active_cell'))
def update_graphs(row_ids, selected_row_ids, active_cell):
# When the table is first rendered, `derived_virtual_data` and
# `derived_virtual_selected_rows` will be `None`. This is due to an
# idiosyncrasy in Dash (unsupplied properties are always None and Dash
# calls the dependent callbacks when the component is first rendered).
# So, if `rows` is `None`, then the component was just rendered
# and its value will be the same as the component's dataframe.
# Instead of setting `None` in here, you could also set
# `derived_virtual_data=df.to_rows('dict')` when you initialize
# the component.
selected_id_set = set(selected_row_ids or [])

if row_ids is None:
    dff = df
    # pandas Series works enough like a list for this to be OK
    row_ids = df['id']
else:
    dff = df.loc[row_ids]

active_row_id = active_cell['row_id'] if active_cell else None

colors = ['#FF69B4' if id == active_row_id
          else '#7FDBFF' if id in selected_id_set
          else '#0074D9'
          for id in row_ids]

return [
    dcc.Graph(
        id=column + '--row-ids',
        figure={
            'data': [
                {
                    'x': dff['country'],
                    'y': dff[column],
                    'type': 'bar',
                    'marker': {'color': colors},
                }
            ],
            'layout': {
                'xaxis': {'automargin': True},
                'yaxis': {
                    'automargin': True,
                    'title': {'text': column}
                },
                'height': 250,
                'margin': {'t': 10, 'l': 10, 'r': 10},
            },
        },
    )
    # check if column exists - user may have deleted it
    # If `column.deletable=False`, then you don't
    # need to do this check.
    for column in ['pop', 'lifeExp', 'gdpPercap'] if column in dff
]


if __name__ == '__main__':
app.run_server(debug=True)

When combining these codes, I get the following error message: NameError: name ‘df’ is not defined.
This is because the datafile (df) is returned in a function in the first piece of code, while it is defined explicitly in the second code. This is not strange, since the first code allows the user to upload a file after running the code, which is not the case for the second code.

What can I do to solve this problem?
Thank you very much!

What is the Dash environment, and how is it relevant to the error you are getting?

Please don’t dump dozens or hundreds of lines of code in our lap and expect us to debug it for you, we’re not being paid to do this, we are volunteers. If you don’t help us to help you, you probably won’t get much help.

At the very least you should simplify your code to the minimum amount that reproduces the error.

Please read:

If you have two scripts, and the first works correctly, we probably don’t need to see the working one.

Please:

  • Explain how you invoke the two scripts. Are you using the command line or some other environment?
  • If using the command line, please copy the command(s) you give.
  • Please copy the full output, starting from the line “Traceback…” and including the final error message.

Paste the output as text, not a picture, in your reply. Put it between code fences (three backticks) like this:

```
paste your output here
```