From 079977df2de97c3c53b84ea0a44ebc2664227408 Mon Sep 17 00:00:00 2001 From: Leonora Tindall Date: Sun, 19 Jan 2025 13:07:03 -0600 Subject: [PATCH] Example of handling simple data file with stdlib only --- simple.py | 73 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100644 simple.py diff --git a/simple.py b/simple.py new file mode 100644 index 0000000..fab9ef4 --- /dev/null +++ b/simple.py @@ -0,0 +1,73 @@ +import sys as trans +import csv +import pprint + +def main(): + if len(trans.argv) != 2: + print("Requires exactly one argument: the CSV to process.") + trans.exit(1) + + filename = trans.argv[1] + + # data we will parse from the CSV + # we assume that participant IDs and event names are unique + participants = {} + events = {} + + # the docs say we must use newline='' if passing in a file-like object + with open(filename, newline='') as file: + # deduce the format of the CSV file by looking at the first + # kilobyte of data + dialect = csv.Sniffer().sniff(file.read(1024)) + # rewind the file to its start + file.seek(0) + reader = csv.DictReader(file, dialect=dialect) + + for row in reader: + # grap all the data from the row. this makes the code below less + # verbose and also ensures all data is present - "fail fast" + participant_id = row['Participant ID'] + participant_name = row['Participant Name'] + event_name = row['Event Name'] + event_type = row['Event Type'] + + # if this participant has not yet been seen, record them + if participant_id not in participants: + participants[participant_id] = { + 'name': participant_name, + 'events': [] + } + + # if this event has not yet been seen, record it + # note that we trust the event type from the first occurrance + if event_name not in events: + events[event_name] = { + 'type': event_type, + 'participants': [] + } + + # record this attendance on both the participant and event + participants[participant_id]['events'] += [event_name] + events[event_name]['participants'] += [participant_name] + + # print all the data + pprint.pp(participants) + pprint.pp(events) + + print() + + # print attendee stats + for pid in participants.keys(): + name = participants[pid]['name'] + num_events = len(participants[pid]['events']) + print(f"{name} ({pid}) attended {num_events} events") + + print() + + # print attendance stats + for event_name in events.keys(): + num_participants = len(events[event_name]['participants']) + print(f"{event_name} had {num_participants} attendees") + +if __name__ == "__main__": + main()