Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions extruct/jsonld.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,14 @@
"""
JSON-LD extractor
"""

import jstyleson
import json
import re

import lxml.etree

from extruct.utils import parse_html

HTML_OR_JS_COMMENTLINE = re.compile(r'^\s*(//.*|<!--.*-->)')


class JsonLdExtractor(object):
Expand All @@ -34,8 +33,7 @@ def _extract_items(self, node):
data = json.loads(script, strict=False)
except ValueError:
# sometimes JSON-decoding errors are due to leading HTML or JavaScript comments
data = json.loads(
HTML_OR_JS_COMMENTLINE.sub('', script), strict=False)
data = jstyleson.loads(script, strict=False)
if isinstance(data, list):
return data
elif isinstance(data, dict):
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ mf2py>=1.1.0
six>=1.11
w3lib
html-text
jstyleson