Skip to content
Snippets Groups Projects

Draft: CSV export

Open Jonathan Weth requested to merge csv-export into master
1 file
+ 1
0
Compare changes
  • Side-by-side
  • Inline
import logging
import os
import shutil
from glob import glob
from tempfile import TemporaryDirectory
from typing import Optional
@@ -9,6 +10,7 @@ from django.conf import settings
from django.contrib import messages
from django.contrib.humanize.templatetags.humanize import apnumber
from django.core.exceptions import ValidationError
from django.core.files import File
from django.db import transaction
from django.template import Context, Template
from django.utils.translation import gettext as _
@@ -21,6 +23,7 @@ from tqdm import tqdm
from aleksis.apps.csv_import.field_types import (
ConnectedMatchFieldType,
DirectMappingFieldType,
IgnoreFieldType,
MatchFieldType,
ProcessFieldType,
)
@@ -29,7 +32,7 @@ from aleksis.core.models import Group, Person
from aleksis.core.util.celery_progress import ProgressRecorder
from aleksis.core.util.core_helpers import process_custom_context_processors
from ..models import ImportJob
from ..models import ExportJob, ImportJob
@transaction.atomic
@@ -187,7 +190,7 @@ def import_csv(
filter(lambda f: isinstance(f[1], MatchFieldType), field_types.items()),
key=lambda f: f[1].get_priority(),
):
if column_name in row:
if column_name in row and row[column_name]:
match_options.setdefault(field_type.get_priority(), {})
match_options[field_type.get_priority()][field_type.get_db_field()] = (
row[column_name]
@@ -332,3 +335,178 @@ def import_csv(
if recorder:
recorder.add_message(messages.WARNING, msg)
logging.warning(msg)
def export_csv(
export_job: ExportJob,
recorder: Optional[ProgressRecorder] = None,
) -> None:
"""Export one CSV/ZIP file from a job."""
all_ok = True
# We work in a temporary directory locally to allow more export scenarios,
# like ZIP files that need to be preprocessed
with TemporaryDirectory() as temp_dir:
# Create output directory
output_dir = os.path.join(temp_dir, "output")
os.mkdir(output_dir)
# Get the job and the source of the export
template = export_job.template
model = template.content_type.model_class()
school_term = export_job.school_term
# Dissect template definition
# These structures will be filled with information for columns
field_types = {}
for field in template.fields.all():
# Get field type and prepare for import
field_type = field.field_type_class(school_term, output_dir, **field.args)
column_name = field_type.get_column_name()
field_types[column_name] = field_type
data = []
# Go through all data rows
qs = model.objects.all()
if hasattr(model, "school_term") and school_term:
qs = qs.filter(school_term=school_term)
qs_list = list(qs)
iterator = recorder.iterate(qs_list) if recorder else tqdm(qs_list)
for item in iterator:
try:
item_data = {}
for column_name, field_type in field_types.items():
if isinstance(field_type, ProcessFieldType):
try:
result = field_type.process_export(item)
item_data[column_name] = result
except (RuntimeError, IndexError) as e:
if recorder:
recorder.add_message(messages.ERROR, str(e))
else:
logging.error(str(e))
else:
# WITH DB FIELD
db_field = field_type.get_db_field()
if db_field:
value = getattr(item, db_field)
# Apply converters
converter = field_type.get_export_converter()
if converter:
value = converter(value)
if isinstance(field_type, (DirectMappingFieldType, MatchFieldType)):
item_data[column_name] = value
if isinstance(field_type, IgnoreFieldType):
item_data[column_name] = ""
data.append(item_data)
except (
ValueError,
ValidationError,
model.MultipleObjectsReturned,
model.DoesNotExist,
) as e:
msg = _("Failed to export {verbose_name} {item}:\n{e}").format(
verbose_name=model._meta.verbose_name, item=item, e=e
)
if recorder:
recorder.add_message(
messages.ERROR,
msg,
)
else:
logging.error(msg)
all_ok = False
csv_name = "export.csv"
csv_path = os.path.join(output_dir, csv_name)
try:
# Use discovered column configurations for one-off Pandas loading
pandas_data = pandas.DataFrame(data)
pandas_data.to_csv(
csv_path,
sep=template.parsed_separator,
# columns=list(field_types.keys()),
header=template.has_header_row,
index=template.has_index_col,
quotechar=template.parsed_quotechar,
encoding="utf8",
)
except ParserError as e:
msg = _("There was an error while writing the CSV file:\n{}").format(e)
if recorder:
recorder.add_message(messages.ERROR, msg)
else:
logging.error(msg)
return
files_count = len(os.listdir(output_dir))
if files_count > 1:
file_path = os.path.join(temp_dir, "export.zip")
shutil.make_archive(os.path.join(temp_dir, "export"), "zip", output_dir)
else:
file_path = csv_path
# Save export file to export job
with open(file_path, "rb") as f:
export_job.data_file.save(os.path.basename(file_path), File(f))
export_job.save()
# for row in iterator:
# # Generate virtual and post-processed field data
# for column_name, field_type in field_types.items():
# # Generate field using a Django template string, and the row as context
# tmpl_str = field_type.get_template()
# if not tmpl_str:
# continue
# tmpl = Template(tmpl_str)
# ctx = Context(row)
# ctx.update(
# process_custom_context_processors(
# settings.NON_REQUEST_CONTEXT_PROCESSORS
# )
# )
# data = tmpl.render(ctx).strip()
# if column_name in virtual_fields:
# # Post-process virtual fields using converter
# data = field_type.get_converter()(data)
# # Store
# row[column_name] = data
# # Set alternatives for some fields
# for column_name, field_type in field_types.items():
# for alternative_db_field in field_type.get_alternative_db_fields():
# origin_db_field = field_type.get_db_field()
# if (
# hasattr(model, alternative_db_field)
# and alternative_db_field not in update_dict
# ):
# if origin_db_field in update_dict:
# update_dict[alternative_db_field] = update_dict[origin_db_field]
# elif origin_db_field in get_dict:
# update_dict[alternative_db_field] = get_dict[origin_db_field]
if all_ok:
msg = _("All {verbose_name} were exported successfully.").format(
verbose_name=model._meta.verbose_name_plural
)
if recorder:
recorder.add_message(messages.SUCCESS, msg)
else:
logging.info(msg)
else:
msg = _("The export of {verbose_name} failed.").format(
verbose_name=model._meta.verbose_name_plural
)
if recorder:
recorder.add_message(messages.ERROR, msg)
else:
logging.warning(msg)
Loading