Commit 03df6078 authored by fuzzle's avatar fuzzle

enddate validation out of html parsing is hellish - correct some edgecases

parent ccc2d285
......@@ -26,7 +26,10 @@ r = http.request("GET", url)
soup = BeautifulSoup(r.data, 'html.parser')
# just some feedback
print str(soup.title.string)
try:
print str(soup.title.string)
except:
None
#print soup.select_one(".field--name-title").string
print ("#####")
......@@ -74,13 +77,20 @@ try:
except:
xxdate = soup.time.attrs['datetime']
# get endtime, should be second time tag
# get endtime, should be second time tag,
# set odder to 1
odder = 2
try:
xxdateend = soup.find_all('time')[1].attrs['datetime']
# if day is not the same , ignore enddate
# checked by 9th character in 2019-10-15T10....
if xxdateend[8:9] != xxdate[8:9]:
xxdateend = xxdate
odder = 1
print "being carefull with rdate, no enddate given"
except:
None
# get geojson
# often there is geojson coordinates which can be added as geo in ics like
# GEO:48.85299;2.36885
......@@ -131,11 +141,11 @@ event.add('dtstamp', datetime.strptime(xxdate, "%Y-%m-%dT%H:%M:%SZ"))
# do rdate if given
# TODO : weekly / monthly events with rrule
try:
# get only every second element of list
# get only every second element of list, if odder=2
# and make rfc conform rdate element
# actually not all used - should generate a rdate;value=period:20191212T010101/20101212T010102
for k,i in enumerate(soup.find_all('time')[0::2]):
#actually not used - should generate a rdate;value=period:20191212T010101/20101212T010102
# https://tools.ietf.org/html/rfc5545#section-3.8.5.2
for k,i in enumerate(soup.find_all('time')[0::odder]):
# von = (datetime.strptime((i.attrs['datetime']), "%Y-%m-%dT%H:%M:%SZ")) #) #+"/"+(str(
# bis = (datetime.strftime((soup.find_all('time')[k+1].attrs['datetime']), "%Y-%m-%dT%H:%M:%SZ")) #) #))
# von = i.attrs['datetime']
......@@ -149,7 +159,10 @@ event.add('transp', "TRANSPARENT")
event.add('tzid', "Europe/Berlin")
event.add('CATEGORIES', cat)
event.add('class', "PUBLIC")
event.add('description', xxdesc+"\n"+url)
try:
event.add('description', xxdesc+"\n"+url)
except:
event.add('description', url)
event.add('location', xxlocation)
# try add GEO Tag
try:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment