-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathapp.py
More file actions
294 lines (238 loc) · 12.7 KB
/
app.py
File metadata and controls
294 lines (238 loc) · 12.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
import os
import logging
import tempfile
import glob
from pathlib import Path
from flask import Flask, render_template, request, jsonify, send_file
from utils.data_processor import process_csv_data
# Set up logging
logging.basicConfig(level=logging.DEBUG)
# Initialize Flask app
app = Flask(__name__)
app.secret_key = os.environ.get("SESSION_SECRET", "default_secret_key_for_development")
# Configure Flask for file uploads
app.config['MAX_CONTENT_LENGTH'] = 100 * 1024 * 1024 # 100MB max upload size
@app.route('/')
def index():
"""Render the main application page."""
return render_template('index.html')
@app.route('/process_csv', methods=['POST'])
def process_csv():
"""Process uploaded CSV file and return processed data."""
try:
if 'file' not in request.files:
return jsonify({"error": "No file part"}), 400
file = request.files['file']
if file.filename == '':
return jsonify({"error": "No selected file"}), 400
# Accept more file extensions to improve compatibility
valid_extensions = ('.csv', '.txt', '.data', '.dat', '.tsv', '.tab', '.log', '.xls', '.xlsx')
if file and (file.filename.lower().endswith(valid_extensions) or '.' not in file.filename):
# Save the file temporarily for more robust processing
temp_file_path = None
try:
# First try with UTF-8 encoding which is most common
logging.info(f"Processing file {file.filename}")
file_content = file.read()
# Try decoding with various encodings in order of likelihood
encodings_to_try = ['utf-8', 'latin-1', 'cp1252', 'iso-8859-1', 'utf-16']
decoded_content = None
for encoding in encodings_to_try:
try:
decoded_content = file_content.decode(encoding)
logging.info(f"Successfully decoded file {file.filename} with {encoding} encoding")
break
except UnicodeDecodeError:
continue
if decoded_content is None:
# If all encodings fail, use latin-1 as a fallback that should never fail
logging.warning(f"All encodings failed for {file.filename}, using latin-1 as binary fallback")
decoded_content = file_content.decode('latin-1', errors='replace')
# Process the CSV data
processed_data = process_csv_data(decoded_content)
if 'error' in processed_data:
logging.error(f"Error in process_csv_data: {processed_data['error']}")
if 'message' in processed_data:
logging.error(f"Error message: {processed_data['message']}")
# If there's an error in processing, try with file path approach for large files
import tempfile
import os
logging.info(f"Trying alternative processing approach for {file.filename}")
# Create a temporary file
temp_fd, temp_file_path = tempfile.mkstemp(suffix='.csv')
os.close(temp_fd)
# Save the content to the temporary file
with open(temp_file_path, 'wb') as f:
file.seek(0)
f.write(file.read())
# Try processing with the file path approach
processed_data = process_csv_data(temp_file_path, use_file_path=True)
else:
logging.info(f"Successfully processed file {file.filename} with {processed_data.get('metadata', {}).get('points_count', 0)} points")
return jsonify(processed_data)
except Exception as e:
logging.error(f"Error processing file: {str(e)}")
import traceback
logging.error(traceback.format_exc())
# Try one more fallback method using file path
try:
if temp_file_path is None:
import tempfile
import os
# Create a temporary file
temp_fd, temp_file_path = tempfile.mkstemp(suffix='.csv')
os.close(temp_fd)
# Save the content to the temporary file
with open(temp_file_path, 'wb') as f:
file.seek(0)
f.write(file.read())
# Try processing with pandas direct file reading with guessed encoding
logging.info(f"Using final fallback method for {file.filename}")
processed_data = process_csv_data(temp_file_path, use_file_path=True, is_large_file=True)
if 'error' not in processed_data:
logging.info(f"Successfully processed file {file.filename} with fallback method")
return jsonify(processed_data)
else:
return jsonify({"error": f"Failed to process file after multiple attempts: {processed_data.get('message', 'Unknown error')}"}), 400
except Exception as fallback_error:
logging.error(f"Fallback processing failed: {str(fallback_error)}")
return jsonify({"error": f"Error processing file: {str(e)}. Fallback also failed: {str(fallback_error)}"}), 500
finally:
# Clean up temporary file if it exists
if temp_file_path and os.path.exists(temp_file_path):
try:
os.remove(temp_file_path)
except:
pass
finally:
# Clean up temporary file if it exists
if temp_file_path:
import os
if os.path.exists(temp_file_path):
try:
os.remove(temp_file_path)
except:
pass
else:
return jsonify({"error": f"File type not supported. Please upload a supported file type ({', '.join(valid_extensions)})."}), 400
except Exception as e:
logging.error(f"Unexpected error in process_csv route: {str(e)}")
import traceback
logging.error(traceback.format_exc())
return jsonify({"error": f"Server error: {str(e)}"}), 500
@app.route('/browse_data', methods=['GET'])
def browse_data():
"""Browse available flight data files on the server."""
try:
# Get folder parameter, default to sample_data
folder = request.args.get('folder', 'sample_data')
# Get pagination parameters
page = int(request.args.get('page', 1))
# Check if 'all' is specified for page_size, which means return all files
page_size_param = request.args.get('page_size', '1000')
if page_size_param.lower() == 'all':
# Set to a very large number to effectively disable pagination
page_size = 100000
else:
# Otherwise use the provided value or default to 1000 files
page_size = int(page_size_param) # Default to 1000 files per page
# Get search filter if provided
search_query = request.args.get('search', '').lower()
# Validate folder to prevent directory traversal - make sure it doesn't contain '..'
if '..' in folder or folder.startswith('/') or ':' in folder:
return jsonify({"error": "Invalid folder path"}), 400
# Check if folder exists within the data directory
data_dir = os.path.join('data', folder)
if not os.path.exists(data_dir):
os.makedirs(data_dir, exist_ok=True) # Create it if it doesn't exist
# Get the list of available folders in the data directory
available_folders = []
for item in os.listdir('data'):
if os.path.isdir(os.path.join('data', item)):
available_folders.append(item)
all_files = []
for file_path in glob.glob(os.path.join(data_dir, '*.*')):
filename = os.path.basename(file_path)
size = os.path.getsize(file_path)
modified = os.path.getmtime(file_path)
# Only include CSV and related files
if filename.lower().endswith(('.csv', '.txt', '.data', '.dat', '.tsv', '.tab')):
# Apply search filter if provided
if search_query and search_query not in filename.lower():
continue
all_files.append({
'name': filename,
'size': size,
'size_formatted': format_file_size(size),
'modified': modified,
'path': os.path.join(folder, filename)
})
# Sort files by modified date (newest first)
all_files.sort(key=lambda x: x['modified'], reverse=True)
# Calculate total pages and file count
total_files = len(all_files)
total_pages = (total_files + page_size - 1) // page_size # Ceiling division
# Apply pagination
start_idx = (page - 1) * page_size
end_idx = min(start_idx + page_size, total_files)
paginated_files = all_files[start_idx:end_idx]
return jsonify({
"current_folder": folder,
"folders": available_folders,
"files": paginated_files,
"pagination": {
"page": page,
"page_size": page_size,
"total_pages": total_pages,
"total_files": total_files
}
})
except Exception as e:
logging.error(f"Error in browse_data: {str(e)}")
import traceback
logging.error(traceback.format_exc())
return jsonify({"error": f"Server error: {str(e)}"}), 500
@app.route('/get_server_file', methods=['GET'])
def get_server_file():
"""Get a file from the server and process it."""
try:
file_path = request.args.get('path')
if not file_path:
return jsonify({"error": "No file path specified"}), 400
# Get the folder from the file path
folder = file_path.split('/')[0] if '/' in file_path else None
if not folder:
return jsonify({"error": "Invalid file path format"}), 400
# Validate folder to prevent directory traversal - make sure it doesn't contain '..'
if '..' in folder or folder.startswith('/') or ':' in folder:
return jsonify({"error": "Invalid folder path"}), 400
full_path = os.path.join('data', file_path)
if not os.path.exists(full_path) or not os.path.isfile(full_path):
return jsonify({"error": "File not found"}), 404
if not full_path.lower().endswith(('.csv', '.txt', '.data', '.dat', '.tsv', '.tab')):
return jsonify({"error": "File type not supported"}), 400
# Process the file
try:
processed_data = process_csv_data(full_path, use_file_path=True)
if 'error' in processed_data:
return jsonify({"error": processed_data.get('message', 'Failed to process file')}), 400
return jsonify(processed_data)
except Exception as proc_error:
logging.error(f"Error processing file: {str(proc_error)}")
import traceback
logging.error(traceback.format_exc())
return jsonify({"error": f"Error processing file: {str(proc_error)}"}), 500
except Exception as e:
logging.error(f"Error in get_server_file: {str(e)}")
import traceback
logging.error(traceback.format_exc())
return jsonify({"error": f"Server error: {str(e)}"}), 500
def format_file_size(size_bytes):
"""Format file size in a human-readable format."""
for unit in ['B', 'KB', 'MB', 'GB']:
if size_bytes < 1024.0:
return f"{size_bytes:.1f} {unit}"
size_bytes /= 1024.0
return f"{size_bytes:.1f} TB"
if __name__ == "__main__":
app.run(host="0.0.0.0", port=5000, debug=True)