@@ -1524,7 +1524,8 @@ def _do_links(self, text: str) -> str:
15241524 anchor_allowed_pos = 0
15251525
15261526 curr_pos = 0
1527- while True : # Handle the next link.
1527+
1528+ while True :
15281529 # The next '[' is the start of:
15291530 # - an inline anchor: [text](url "title")
15301531 # - a reference anchor: [text][id]
@@ -1552,8 +1553,11 @@ def _do_links(self, text: str) -> str:
15521553 # matching brackets in img alt text -- we'll differ in that
15531554 # regard.
15541555 bracket_depth = 0
1555- for p in range (start_idx + 1 , min (start_idx + MAX_LINK_TEXT_SENTINEL ,
1556- text_length )):
1556+
1557+ for p in range (
1558+ start_idx + 1 ,
1559+ min (start_idx + MAX_LINK_TEXT_SENTINEL , text_length )
1560+ ):
15571561 ch = text [p ]
15581562 if ch == ']' :
15591563 bracket_depth -= 1
@@ -1566,7 +1570,7 @@ def _do_links(self, text: str) -> str:
15661570 # This isn't markup.
15671571 curr_pos = start_idx + 1
15681572 continue
1569- link_text = text [start_idx + 1 : p ]
1573+ link_text = text [start_idx + 1 : p ]
15701574
15711575 # Fix for issue 341 - Injecting XSS into link text
15721576 if self .safe_mode :
@@ -1578,72 +1582,30 @@ def _do_links(self, text: str) -> str:
15781582 normed_id = re .sub (r'\W' , '-' , link_text [1 :])
15791583 if normed_id in self .footnotes :
15801584 self .footnote_ids .append (normed_id )
1581- result = '<sup class="footnote-ref" id="fnref-%s">' \
1582- '<a href="#fn-%s">%s</a></sup>' \
1583- % (normed_id , normed_id , len (self .footnote_ids ))
1585+ result = (
1586+ f'<sup class="footnote-ref" id="fnref-{ normed_id } ">'
1587+ f'<a href="#fn-{ normed_id } ">{ len (self .footnote_ids )} </a></sup>'
1588+ )
15841589 text = text [:start_idx ] + result + text [p + 1 :]
15851590 else :
15861591 # This id isn't defined, leave the markup alone.
1587- curr_pos = p + 1
1592+ curr_pos = p + 1
15881593 continue
15891594
15901595 # Now determine what this is by the remainder.
15911596 p += 1
15921597
1593- # Inline anchor or img?
1594- if text [p :p + 1 ] == '(' : # attempt at perf improvement
1595- url , title , url_end_idx = self ._extract_url_and_title (text , p )
1596- if url is not None :
1597- # Handle an inline anchor or img.
1598- is_img = start_idx > 0 and text [start_idx - 1 ] == "!"
1599- if is_img :
1600- start_idx -= 1
1598+ # -- Extract the URL, title and end index from the link
16011599
1602- # We've got to encode these to avoid conflicting
1603- # with italics/bold.
1604- url = self ._unhash_html_spans (url , code = True ) \
1605- .replace ('*' , self ._escape_table ['*' ]) \
1606- .replace ('_' , self ._escape_table ['_' ])
1607- if title :
1608- title_str = ' title="%s"' % (
1609- _xml_escape_attr (title )
1610- .replace ('*' , self ._escape_table ['*' ])
1611- .replace ('_' , self ._escape_table ['_' ]))
1612- else :
1613- title_str = ''
1614- if is_img :
1615- img_class_str = self ._html_class_str_from_tag ("img" )
1616- result = '<img src="%s" alt="%s"%s%s%s' \
1617- % (self ._protect_url (url ),
1618- self ._hash_span (_xml_escape_attr (link_text )),
1619- title_str ,
1620- img_class_str ,
1621- self .empty_element_suffix )
1622- if "smarty-pants" in self .extras :
1623- result = result .replace ('"' , self ._escape_table ['"' ])
1624- curr_pos = start_idx + len (result )
1625- anchor_allowed_pos = start_idx + len (result )
1626- text = text [:start_idx ] + result + text [url_end_idx :]
1627- elif start_idx >= anchor_allowed_pos :
1628- safe_link = self ._safe_href .match (url )
1629- if self .safe_mode and not safe_link :
1630- result_head = '<a href="#"%s>' % (title_str )
1631- else :
1632- result_head = '<a href="{}"{}>' .format (self ._protect_url (url ), title_str )
1633- result = '{}{}</a>' .format (result_head , link_text )
1634- if "smarty-pants" in self .extras :
1635- result = result .replace ('"' , self ._escape_table ['"' ])
1636- # <img> allowed from curr_pos on, <a> from
1637- # anchor_allowed_pos on.
1638- curr_pos = start_idx + len (result_head )
1639- anchor_allowed_pos = start_idx + len (result )
1640- text = text [:start_idx ] + result + text [url_end_idx :]
1641- else :
1642- # Anchor not allowed here.
1643- curr_pos = start_idx + 1
1600+ # inline anchor or inline img
1601+ if text [p :p + 1 ] == '(' :
1602+ url , title , url_end_idx = self ._extract_url_and_title (text , p )
1603+ if url is None :
1604+ # text isn't markup
1605+ curr_pos = start_idx + 1
16441606 continue
1645-
1646- # Reference anchor or img?
1607+ url = self . _unhash_html_spans ( url , code = True )
1608+ # reference anchor or reference img
16471609 else :
16481610 match = None
16491611 if 'link-shortrefs' in self .extras :
@@ -1662,64 +1624,71 @@ def _do_links(self, text: str) -> str:
16621624 match = None
16631625
16641626 match = match or self ._tail_of_reference_link_re .match (text , p )
1665- if match :
1666- # Handle a reference-style anchor or img.
1667- is_img = start_idx > 0 and text [start_idx - 1 ] == "!"
1668- if is_img :
1669- start_idx -= 1
1670- link_id = match .group ("id" ).lower ()
1671- if not link_id :
1672- link_id = link_text .lower () # for links like [this][]
1673- if link_id in self .urls :
1674- url = self .urls [link_id ]
1675- # We've got to encode these to avoid conflicting
1676- # with italics/bold.
1677- url = url .replace ('*' , self ._escape_table ['*' ]) \
1678- .replace ('_' , self ._escape_table ['_' ])
1679- title = self .titles .get (link_id )
1680- if title :
1681- title = _xml_escape_attr (title ) \
1682- .replace ('*' , self ._escape_table ['*' ]) \
1683- .replace ('_' , self ._escape_table ['_' ])
1684- title_str = ' title="%s"' % title
1685- else :
1686- title_str = ''
1687- if is_img :
1688- img_class_str = self ._html_class_str_from_tag ("img" )
1689- result = '<img src="%s" alt="%s"%s%s%s' \
1690- % (self ._protect_url (url ),
1691- self ._hash_span (_xml_escape_attr (link_text )),
1692- title_str ,
1693- img_class_str ,
1694- self .empty_element_suffix )
1695- if "smarty-pants" in self .extras :
1696- result = result .replace ('"' , self ._escape_table ['"' ])
1697- curr_pos = start_idx + len (result )
1698- text = text [:start_idx ] + result + text [match .end ():]
1699- elif start_idx >= anchor_allowed_pos :
1700- if self .safe_mode and not self ._safe_href .match (url ):
1701- result_head = '<a href="#"%s>' % (title_str )
1702- else :
1703- result_head = '<a href="{}"{}>' .format (self ._protect_url (url ), title_str )
1704- result = '{}{}</a>' .format (result_head , link_text )
1705- if "smarty-pants" in self .extras :
1706- result = result .replace ('"' , self ._escape_table ['"' ])
1707- # <img> allowed from curr_pos on, <a> from
1708- # anchor_allowed_pos on.
1709- curr_pos = start_idx + len (result_head )
1710- anchor_allowed_pos = start_idx + len (result )
1711- text = text [:start_idx ] + result + text [match .end ():]
1712- else :
1713- # Anchor not allowed here.
1714- curr_pos = start_idx + 1
1715- else :
1716- # This id isn't defined, leave the markup alone.
1717- # set current pos to end of link title and continue from there
1718- curr_pos = p
1627+ if not match :
1628+ # text isn't markup
1629+ curr_pos = start_idx + 1
17191630 continue
17201631
1721- # Otherwise, it isn't markup.
1722- curr_pos = start_idx + 1
1632+ link_id = match .group ("id" ).lower () or link_text .lower () # for links like [this][]
1633+
1634+ if link_id not in self .urls :
1635+ # This id isn't defined, leave the markup alone.
1636+ # set current pos to end of link title and continue from there
1637+ curr_pos = p
1638+ continue
1639+
1640+ url = self .urls [link_id ]
1641+ title = self .titles .get (link_id )
1642+ url_end_idx = match .end ()
1643+
1644+ # -- Encode and hash the URL and title to avoid conflicts with italics/bold
1645+
1646+ url = (
1647+ url
1648+ .replace ('*' , self ._escape_table ['*' ])
1649+ .replace ('_' , self ._escape_table ['_' ])
1650+ )
1651+ if title :
1652+ title = (
1653+ _xml_escape_attr (title )
1654+ .replace ('*' , self ._escape_table ['*' ])
1655+ .replace ('_' , self ._escape_table ['_' ])
1656+ )
1657+ title_str = f' title="{ title } "'
1658+ else :
1659+ title_str = ''
1660+
1661+ # -- Process the anchor/image
1662+
1663+ is_img = start_idx > 0 and text [start_idx - 1 ] == "!"
1664+ if is_img :
1665+ start_idx -= 1
1666+ img_class_str = self ._html_class_str_from_tag ("img" )
1667+ result = result_head = (
1668+ f'<img src="{ self ._protect_url (url )} "'
1669+ f' alt="{ self ._hash_span (_xml_escape_attr (link_text ))} "'
1670+ f'{ title_str } { img_class_str } { self .empty_element_suffix } '
1671+ )
1672+ elif start_idx >= anchor_allowed_pos :
1673+ if self .safe_mode and not self ._safe_href .match (url ):
1674+ result_head = f'<a href="#"{ title_str } >'
1675+ else :
1676+ result_head = f'<a href="{ self ._protect_url (url )} "{ title_str } >'
1677+ result = f'{ result_head } { link_text } </a>'
1678+ else :
1679+ # anchor not allowed here/invalid markup
1680+ curr_pos = start_idx + 1
1681+ continue
1682+
1683+ if "smarty-pants" in self .extras :
1684+ result = result .replace ('"' , self ._escape_table ['"' ])
1685+
1686+ # <img> allowed from curr_pos onwards, <a> allowed from anchor_allowed_pos onwards.
1687+ # this means images can exist within `<a>` tags but anchors can only come after the
1688+ # current anchor has been closed
1689+ curr_pos = start_idx + len (result_head )
1690+ anchor_allowed_pos = start_idx + len (result )
1691+ text = text [:start_idx ] + result + text [url_end_idx :]
17231692
17241693 return text
17251694
0 commit comments