I am looking to create a tool in the Dash environment that allows me to upload a datafile, filter/select the data I want and plot the selected data. To do this I tried to combine two documentation codes, one for uploading and plotting data and one for filtering out data.
The code for uploading and plotting data:
import base64
import datetime
import io
import sys
import plotly.graph_objs as go
import cufflinks as cf
import dash
from dash.dependencies import Input, Output, State
from dash import dcc
from dash import html
from dash import dash_table
import pandas as pd
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
app = dash.Dash(__name__, external_stylesheets=external_stylesheets)
server = app.server
colors = {
"graphBackground": "#F5F5F5",
"background": "#ffffff",
"text": "#000000"
}
app.layout = html.Div([
dcc.Upload(
id='upload-data',
children=html.Div([
'Drag and Drop or ',
html.A('Select Files')
]),
style={
'width': '100%',
'height': '60px',
'lineHeight': '60px',
'borderWidth': '1px',
'borderStyle': 'dashed',
'borderRadius': '5px',
'textAlign': 'center',
'margin': '10px'
},
# Allow multiple files to be uploaded
multiple=True
),
dcc.Graph(id='Mygraph'),
html.Div(id='output-data-upload')
])
def parse_data(contents, filename):
content_type, content_string = contents.split(',')
decoded = base64.b64decode(content_string)
try:
if 'csv' in filename:
# Assume that the user uploaded a CSV or TXT file
df = pd.read_csv(
io.StringIO(decoded.decode('utf-8')))
elif 'xls' in filename:
# Assume that the user uploaded an excel file
df = pd.read_excel(io.BytesIO(decoded))
elif 'txt' or 'tsv' in filename:
# Assume that the user upl, delimiter = r'\s+'oaded an excel file
df = pd.read_csv(
io.StringIO(decoded.decode('utf-8')), delimiter = r'\s+')
except Exception as e:
print(e)
return html.Div([
'There was an error processing this file.'
])
return df
@app.callback(Output('Mygraph', 'figure'), [
Input('upload-data', 'contents'),
Input('upload-data', 'filename')
])
def update_graph(contents, filename):
t = []
x = []
y = []
if contents:
contents = contents[0]
filename = filename[0]
df = parse_data(contents, filename)
# df = df.set_index(df.columns[0])
t=df['time']
x=df['x_coordinate']
y=df['y_coordinate']
fig = go.Figure(
data=[
go.Scatter(
x=t,
y=y,
mode='lines+markers')
],
layout=go.Layout(
plot_bgcolor=colors["graphBackground"],
paper_bgcolor=colors["graphBackground"]
))
return fig
@app.callback(Output('output-data-upload', 'children'),
[
Input('upload-data', 'contents'),
Input('upload-data', 'filename')
])
def update_table(contents, filename):
table = html.Div()
if contents:
contents = contents[0]
filename = filename[0]
df = parse_data(contents, filename)
table = html.Div([
html.H5(filename),
dash_table.DataTable(
data=df.to_dict('rows'),
columns=[{'name': i, 'id': i} for i in df.columns]
),
html.Hr(),
html.Div('Raw Content'),
html.Pre(contents[0:200] + '...', style={
'whiteSpace': 'pre-wrap',
'wordBreak': 'break-all'
})
])
return table
if __name__ == '__main__':
app.run_server(debug=True)
The code for filtering data:
from dash import Dash, dash_table, dcc, html
from dash.dependencies import Input, Output
import pandas as pd
df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/gapminder2007.csv')
# add an id column and set it as the index
# in this case the unique ID is just the country name, so we could have just
# renamed 'country' to 'id' (but given it the display name 'country'), but
# here it's duplicated just to show the more general pattern.
df['id'] = df['country']
df.set_index('id', inplace=True, drop=False)
app = Dash(__name__)
app.layout = html.Div([
dash_table.DataTable(
id='datatable-row-ids',
columns=[
{'name': i, 'id': i, 'deletable': True} for i in df.columns
# omit the id column
if i != 'id'
],
data=df.to_dict('records'),
editable=True,
filter_action="native",
sort_action="native",
sort_mode='multi',
row_selectable='multi',
row_deletable=True,
selected_rows=[],
page_action='native',
page_current= 0,
page_size= 10,
),
html.Div(id='datatable-row-ids-container')
])
@app.callback(
Output('datatable-row-ids-container', 'children'),
Input('datatable-row-ids', 'derived_virtual_row_ids'),
Input('datatable-row-ids', 'selected_row_ids'),
Input('datatable-row-ids', 'active_cell'))
def update_graphs(row_ids, selected_row_ids, active_cell):
# When the table is first rendered, `derived_virtual_data` and
# `derived_virtual_selected_rows` will be `None`. This is due to an
# idiosyncrasy in Dash (unsupplied properties are always None and Dash
# calls the dependent callbacks when the component is first rendered).
# So, if `rows` is `None`, then the component was just rendered
# and its value will be the same as the component's dataframe.
# Instead of setting `None` in here, you could also set
# `derived_virtual_data=df.to_rows('dict')` when you initialize
# the component.
selected_id_set = set(selected_row_ids or [])
if row_ids is None:
dff = df
# pandas Series works enough like a list for this to be OK
row_ids = df['id']
else:
dff = df.loc[row_ids]
active_row_id = active_cell['row_id'] if active_cell else None
colors = ['#FF69B4' if id == active_row_id
else '#7FDBFF' if id in selected_id_set
else '#0074D9'
for id in row_ids]
return [
dcc.Graph(
id=column + '--row-ids',
figure={
'data': [
{
'x': dff['country'],
'y': dff[column],
'type': 'bar',
'marker': {'color': colors},
}
],
'layout': {
'xaxis': {'automargin': True},
'yaxis': {
'automargin': True,
'title': {'text': column}
},
'height': 250,
'margin': {'t': 10, 'l': 10, 'r': 10},
},
},
)
# check if column exists - user may have deleted it
# If `column.deletable=False`, then you don't
# need to do this check.
for column in ['pop', 'lifeExp', 'gdpPercap'] if column in dff
]
if __name__ == '__main__':
app.run_server(debug=True)
When combining these codes, I get the following error message: NameError: name ‘df’ is not defined.
This is because the datafile (df) is returned in a function in the first piece of code, while it is defined explicitly in the second code. This is not strange, since the first code allows the user to upload a file after running the code, which is not the case for the second code.
What can I do to solve this problem?
Thank you very much!