Friday, February 25, 2011

Streaming a csv file

I've tried a couple of different methods for streaming a large csv
file (it takes a few minutes to download entirely). The most recent
method I pulled from here:
http://stackoverflow.com/questions/2922874/how-to-stream-an-httpresponse-with-django

I turned off the GZipMiddleware and added the
@condition(etag_func=None)

get_row_data() is generating output when I raise and error within the
generator, but when the response sends, it is empty.

If you prefer to view it in a paste, here it is: http://dpaste.com/hold/449407/

import csv, cStringIO, time
from django.db import models
from django.db.models.fields.related import RelatedField
from django.http import HttpResponse
from django.views.decorators.http import condition

@condition(etag_func=None)
def csv_view(request, app_label, model_name):
""" Based on the filters in the query, return a csv file for the
given model """

#Get the model
model = models.get_model(app_label, model_name)

#if there are filters in the query
if request.method == 'GET':
#if the query is not empty
if request.META['QUERY_STRING'] != None:
keyword_arg_dict = {}
for key, value in request.GET.items():
#get the query filters
keyword_arg_dict[str(key)] = str(value)
#generate a list of row objects, based on the filters
objects_list = model.objects.filter(**keyword_arg_dict)
else:
#get all the model's objects
objects_list = model.objects.all()
else:
#get all the model's objects
objects_list = model.objects.all()
#max_items = 0
#loop through all the requested rows
#for row in objects_list:
#Create the csv wrapper for returning the csv file
#csv_wrapper = CSVWrapper(cStringIO.StringIO(), model,
objects_list)
#create the reponse object with a csv mimetype
response = HttpResponse(
stream_response_generator(model, objects_list),
mimetype='text/plain',
)
#Create the csv filename
filename = "%s_%s.csv" % (app_label, model_name)
#Set the response as an attachment with a filename
#response['Content-Disposition'] = "attachment; filename=%s" %
(filename)
return response

def stream_response_generator(model, objects_list):
"""Streaming function to return data iteratively """
yield get_field_headers(model)
for row_item in objects_list:
yield get_row_data(model, row_item)
time.sleep(1)

def get_row_data(model, row):
"""Get a row of csv data from an object"""
#Create a temporary csv handle
csv_handle = cStringIO.StringIO()
#create the csv output object
csv_output = csv.writer(csv_handle)
value_list = []
for field in model._meta.fields:
#Set the item amount to the amount of fields to start
#item_amount = len(model._meta.fields)
#if the field is a related field (ForeignKey, ManyToMany,
OneToOne)
if isinstance(field, RelatedField):
#get the related model from the field object
related_model = field.rel.to
for key in row.__dict__.keys():
#find the field in the row that matches the related
field
if key.startswith(field.name):
#Get the unicode version of the row in the related
model, based on the id
try:
entry = related_model.objects.get(
id__exact=int(row.__dict__[key]),
)
except:
pass
else:
value = entry.__unicode__().encode("utf-8")
#item_amount += 1
break
#if it isn't a related field
else:
#get the value of the field
if isinstance(row.__dict__[field.name], basestring):
value = row.__dict__[field.name].encode("utf-8")
else:
value = row.__dict__[field.name]
#Determine of the current item amount is larger, make it the
max
#max_items = max(max_items, item_amount)
value_list.append(value)
#add the row of csv values to the csv file
csv_output.writerow(value_list)
#Return the string value of the csv output
return csv_handle.getvalue()

def get_field_headers(model):
"""Get the headers of the model's csv"""
#Create a temporary csv handle
csv_handle = cStringIO.StringIO()
#create the csv output object
csv_output = csv.writer(csv_handle)
field_names = []
#gather all the field names, in the same order they were defined
in the model
for field in model._meta.fields:
field_names.append(field.name)
#write them as the first csv row
csv_output.writerow(field_names)
#Return the string value of the csv output
return csv_handle.getvalue()

--
You received this message because you are subscribed to the Google Groups "Django users" group.
To post to this group, send email to django-users@googlegroups.com.
To unsubscribe from this group, send email to django-users+unsubscribe@googlegroups.com.
For more options, visit this group at http://groups.google.com/group/django-users?hl=en.

No comments:

Post a Comment