edX events decoding

The following script can be easily adapted to decode edX discussion events

——————————-

import json
import pylab as pl
import csv
import ast
import httpagentparser
import dateutil
from datetime import datetime
import dateutil.parser
from dateutil import tz
from geoip import geolite2

data=[]
#import the event log files
events=[‘…’,’…’]

def utc_to_local(utc):
to_zone=tz.tzlocal()
utc=dateutil.parser.parse(utc)
utc=utc.replace(tzinfo=dateutil.tz.tzutc())
utc=utc.astimezone(to_zone)
return utc

def replace_all(text, dic):
for i, j in dic.items():
text = text.replace(i, j)
return text

for i in events:

with open(i) as f:
for line in f:
data.append(json.loads(line))
f.closed

#define the course id

string1=(‘course-v1:courseid’)

#define the event type
string2=(‘edx.forum.thread.created’,’edx.forum.response.created’,’edx.forum.comment.created’)

string3=(‘user_id’)
string4=(‘category_name’)
string5=(‘id’)
string6=(‘path’)

rep={‘edx.forum.thread.created’:’new post’,’edx.forum.comment.created’:’comment about a reply’,’edx.forum.response.created’:’reply to a new post’}

with open (‘DiscussionEvents.csv’,’w’) as csvfile:
fieldnames = [‘agent’,’context’,’path’,’userid’,’event’,’category_name’,’discussionid’,’event_type’,’ip’,’time’,’time2′,’date’,’username’,’event_source’, ‘accept_language’, ‘host’, ‘page’, ‘referer’,’session’,’name’]
writer=csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for i in data:
course=str(i[‘context’])
eventtype=str(i[‘event_type’])
#discussionevent=str(i[‘event’])
#event=ast.literal_eval(str(i[‘event’]))
if (string1 in course):

for j in range(3):

if (string2[j] in eventtype):
for k,v in i[‘context’].items():
if string3 in k:
i[‘userid’]=v
for k,v in i[‘context’].items():
if string6 in k:
i[‘path’]=v

event=ast.literal_eval(str(i[‘event’]))
#print (event.keys())

for k, value in event.items():
if string4 in k:
i[‘category_name’]=value
for k, value in event.items():
if string5 == k:
i[‘discussionid’]=value

i[‘event_type’]=replace_all(i[‘event_type’],rep)
i[‘agent’]=httpagentparser.simple_detect(i[‘agent’])
i[‘time’]=utc_to_local(i[‘time’])
#i[‘time2’]=i[‘time’].time()
i[‘date’]=i[‘time’].date()
try:
i[‘ip’]=geolite2.lookup(i[‘ip’]).location
except AttributeError:
pass
#i[‘ip’]=geolite2.lookup(i[‘ip’])
#i[‘ip’]=i[‘ip’].location
writer.writerow(i)