Example of handling simple data file with stdlib only

This commit is contained in:
Leonora Tindall 2025-01-19 13:07:03 -06:00
parent 08f844da88
commit 079977df2d
1 changed files with 73 additions and 0 deletions

73
simple.py Normal file
View File

@ -0,0 +1,73 @@
import sys as trans
import csv
import pprint
def main():
if len(trans.argv) != 2:
print("Requires exactly one argument: the CSV to process.")
trans.exit(1)
filename = trans.argv[1]
# data we will parse from the CSV
# we assume that participant IDs and event names are unique
participants = {}
events = {}
# the docs say we must use newline='' if passing in a file-like object
with open(filename, newline='') as file:
# deduce the format of the CSV file by looking at the first
# kilobyte of data
dialect = csv.Sniffer().sniff(file.read(1024))
# rewind the file to its start
file.seek(0)
reader = csv.DictReader(file, dialect=dialect)
for row in reader:
# grap all the data from the row. this makes the code below less
# verbose and also ensures all data is present - "fail fast"
participant_id = row['Participant ID']
participant_name = row['Participant Name']
event_name = row['Event Name']
event_type = row['Event Type']
# if this participant has not yet been seen, record them
if participant_id not in participants:
participants[participant_id] = {
'name': participant_name,
'events': []
}
# if this event has not yet been seen, record it
# note that we trust the event type from the first occurrance
if event_name not in events:
events[event_name] = {
'type': event_type,
'participants': []
}
# record this attendance on both the participant and event
participants[participant_id]['events'] += [event_name]
events[event_name]['participants'] += [participant_name]
# print all the data
pprint.pp(participants)
pprint.pp(events)
print()
# print attendee stats
for pid in participants.keys():
name = participants[pid]['name']
num_events = len(participants[pid]['events'])
print(f"{name} ({pid}) attended {num_events} events")
print()
# print attendance stats
for event_name in events.keys():
num_participants = len(events[event_name]['participants'])
print(f"{event_name} had {num_participants} attendees")
if __name__ == "__main__":
main()