import sys as trans import csv import pprint def main(): if len(trans.argv) != 2: print("Requires exactly one argument: the CSV to process.") trans.exit(1) filename = trans.argv[1] # data we will parse from the CSV # we assume that participant IDs and event names are unique participants = {} events = {} # the docs say we must use newline='' if passing in a file-like object with open(filename, newline='') as file: # deduce the format of the CSV file by looking at the first # kilobyte of data dialect = csv.Sniffer().sniff(file.read(1024)) # rewind the file to its start file.seek(0) reader = csv.DictReader(file, dialect=dialect) for row in reader: # grap all the data from the row. this makes the code below less # verbose and also ensures all data is present - "fail fast" participant_id = row['Participant ID'] participant_name = row['Participant Name'] event_name = row['Event Name'] event_type = row['Event Type'] # if this participant has not yet been seen, record them if participant_id not in participants: participants[participant_id] = { 'name': participant_name, 'events': [] } # if this event has not yet been seen, record it # note that we trust the event type from the first occurrance if event_name not in events: events[event_name] = { 'type': event_type, 'participants': [] } # record this attendance on both the participant and event participants[participant_id]['events'] += [event_name] events[event_name]['participants'] += [participant_name] # print all the data pprint.pp(participants) pprint.pp(events) print() # print attendee stats for pid in participants.keys(): name = participants[pid]['name'] num_events = len(participants[pid]['events']) print(f"{name} ({pid}) attended {num_events} events") print() # print attendance stats for event_name in events.keys(): num_participants = len(events[event_name]['participants']) print(f"{event_name} had {num_participants} attendees") if __name__ == "__main__": main()