feat: improve web_fetch readability extraction

This commit is contained in:
Peter Steinberger
2026-01-16 23:17:55 +00:00
parent 9aad6dfe1b
commit 37fa4f7eef
9 changed files with 242 additions and 8 deletions

120
pnpm-lock.yaml generated
View File

@@ -40,6 +40,9 @@ importers:
'@mariozechner/pi-tui':
specifier: ^0.46.0
version: 0.46.0
'@mozilla/readability':
specifier: ^0.6.0
version: 0.6.0
'@sinclair/typebox':
specifier: 0.34.47
version: 0.34.47
@@ -100,6 +103,9 @@ importers:
json5:
specifier: ^2.2.3
version: 2.2.3
linkedom:
specifier: ^0.18.12
version: 0.18.12
long:
specifier: 5.3.2
version: 5.3.2
@@ -1036,6 +1042,10 @@ packages:
'@mistralai/mistralai@1.10.0':
resolution: {integrity: sha512-tdIgWs4Le8vpvPiUEWne6tK0qbVc+jMenujnvTqOjogrJUsCSQhus0tHTU1avDDh5//Rq2dFgP9mWRAdIEoBqg==}
'@mozilla/readability@0.6.0':
resolution: {integrity: sha512-juG5VWh4qAivzTAeMzvY9xs9HY5rAcr2E4I7tiSSCokRFi7XIZCAu92ZkSTsIj1OPceCifL3cpfteP3pDT9/QQ==}
engines: {node: '>=14.0.0'}
'@napi-rs/wasm-runtime@1.1.1':
resolution: {integrity: sha512-p64ah1M1ld8xjWv3qbvFwHiFVWrq1yFvV4f7w+mzaqiR4IlSgkqhcRdHwsGgomwzBH51sRY4NEowLxnaBjcW/A==}
@@ -2221,6 +2231,9 @@ packages:
resolution: {integrity: sha512-oP5VkATKlNwcgvxi0vM0p/D3n2C3EReYVX+DNYs5TjZFn/oQt2j+4sVJtSMr18pdRr8wjTcBl6LoV+FUwzPmNA==}
engines: {node: '>=18'}
boolbase@1.0.0:
resolution: {integrity: sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww==}
bottleneck@2.19.5:
resolution: {integrity: sha512-VHiNCbI1lKdl44tGrhNfU3lup0Tj/ZBMJB5/2ZbNXRCPuRCO7ed2mgcK4r17y+KB2EfuYuRaVlwNbAeaWGSpbw==}
@@ -2405,6 +2418,16 @@ packages:
resolution: {integrity: sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==}
engines: {node: '>= 8'}
css-select@5.2.2:
resolution: {integrity: sha512-TizTzUddG/xYLA3NXodFM0fSbNizXjOKhqiQQwvhlspadZokn1KDy0NZFS0wuEubIYAV5/c1/lAr0TaaFXEXzw==}
css-what@6.2.2:
resolution: {integrity: sha512-u/O3vwbptzhMs3L1fQE82ZSLHQQfto5gyZzwteVIEyeaY5Fc7R4dapF/BvRoSYFeqfBk4m0V1Vafq5Pjv25wvA==}
engines: {node: '>= 6'}
cssom@0.5.0:
resolution: {integrity: sha512-iKuQcq+NdHqlAcwUY0o/HL69XQrUaQdMjmStJ8JFmUaiiQErlhrmuigkg/CU4E2J0IyUKUrMAgl36TvN67MqTw==}
curve25519-js@0.0.4:
resolution: {integrity: sha512-axn2UMEnkhyDUPWOwVKBMVIzSQy2ejH2xRGy1wq81dqRwApXfIzfbE3hIX0ZRFBIihf/KDqK158DLwESu4AK1w==}
@@ -2456,9 +2479,22 @@ packages:
docx-preview@0.3.7:
resolution: {integrity: sha512-Lav69CTA/IYZPJTsKH7oYeoZjyg96N0wEJMNslGJnZJ+dMUZK85Lt5ASC79yUlD48ecWjuv+rkcmFt6EVPV0Xg==}
dom-serializer@2.0.0:
resolution: {integrity: sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg==}
domelementtype@2.3.0:
resolution: {integrity: sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==}
domhandler@5.0.3:
resolution: {integrity: sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==}
engines: {node: '>= 4'}
dompurify@3.3.1:
resolution: {integrity: sha512-qkdCKzLNtrgPFP1Vo+98FRzJnBRGe4ffyCea9IwHB1fyxPOeNTHpLKYGd4Uk9xvNoH0ZoOjwZxNptyMwqrId1Q==}
domutils@3.2.2:
resolution: {integrity: sha512-6kZKyUajlDuqlHKVX1w7gyslj9MPIXzIFiz/rGu35uC1wMi+kMhQwGhl4lt9unC9Vb9INnY9Z3/ZA3+FhASLaw==}
dotenv@17.2.3:
resolution: {integrity: sha512-JVUnt+DUIzu87TABbhPmNfVdBDt18BLOWjMUFJMSi/Qqg7NTYtabbvSNJGOJ7afbRuv9D/lngizHtP7QyLQ+9w==}
engines: {node: '>=12'}
@@ -2493,6 +2529,10 @@ packages:
resolution: {integrity: sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==}
engines: {node: '>=0.12'}
entities@6.0.1:
resolution: {integrity: sha512-aN97NXWF6AWBTahfVOIrB/NShkzi5H7F9r1s9mD3cDj4Ko5f2qhhVoYMibXF7GlLveb/D2ioWay8lxI97Ven3g==}
engines: {node: '>=0.12'}
env-var@7.5.0:
resolution: {integrity: sha512-mKZOzLRN0ETzau2W2QXefbFjo5EF4yWq28OyKb9ICdeNhHJlOE/pHHnz4hdYJ9cNZXcJHo5xN4OT4pzuSHSNvA==}
engines: {node: '>=10'}
@@ -2767,9 +2807,15 @@ packages:
html-escaper@2.0.2:
resolution: {integrity: sha512-H2iMtd0I4Mt5eYiapRdIDjp+XzelXQ0tFE4JS7YFwFevXXMmOp9myNrUvCg0D6ws8iqkRPBfKHgbwig1SmlLfg==}
html-escaper@3.0.3:
resolution: {integrity: sha512-RuMffC89BOWQoY0WKGpIhn5gX3iI54O6nRA0yC124NYVtzjmFWBIiFd8M0x+ZdX0P9R4lADg1mgP8C7PxGOWuQ==}
html-parse-string@0.0.9:
resolution: {integrity: sha512-wyGnsOolHbNrcb8N6bdJF4EHyzd3zVGCb9/mBxeNjAYBDOZqD7YkqLBz7kXtdgHwNnV8lN/BpSDpsI1zm8Sd8g==}
htmlparser2@10.0.0:
resolution: {integrity: sha512-TwAZM+zE5Tq3lrEHvOlvwgj1XLWQCtaaibSN11Q+gGBAS7Y1uZSWwXXRe4iF6OXnaq1riyQAPFOBtYc77Mxq0g==}
http-errors@2.0.1:
resolution: {integrity: sha512-4FbRdAX+bSdmo4AUFuS0WNiPz8NgFt+r8ThgNWmlrjQjt1Q7ZR9+zTlce2859x4KSXrwIsaeTqDoKQmtP8pLmQ==}
engines: {node: '>= 0.8'}
@@ -3037,6 +3083,15 @@ packages:
limiter@1.1.5:
resolution: {integrity: sha512-FWWMIEOxz3GwUI4Ts/IvgVy6LPvoMPgjMdQ185nN6psJyBJ4yOpzqm695/h5umdLJg2vW3GR5iG11MAkR2AzJA==}
linkedom@0.18.12:
resolution: {integrity: sha512-jalJsOwIKuQJSeTvsgzPe9iJzyfVaEJiEXl+25EkKevsULHvMJzpNqwvj1jOESWdmgKDiXObyjOYwlUqG7wo1Q==}
engines: {node: '>=16'}
peerDependencies:
canvas: '>= 2'
peerDependenciesMeta:
canvas:
optional: true
linkify-it@5.0.0:
resolution: {integrity: sha512-5aHCbzQRADcdP+ATqnDuhhJ/MRIqDkZX5pyjFHRRysS8vZ5AbqGEoFIb6pYHPZ+L/OC2Lc+xT8uHVVR5CAK/wQ==}
@@ -3307,6 +3362,9 @@ packages:
engines: {node: ^12.13.0 || ^14.15.0 || >=16.0.0}
deprecated: This package is no longer supported.
nth-check@2.1.1:
resolution: {integrity: sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w==}
object-assign@4.1.1:
resolution: {integrity: sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==}
engines: {node: '>=0.10.0'}
@@ -3982,6 +4040,9 @@ packages:
uhtml@5.0.9:
resolution: {integrity: sha512-qPyu3vGilaLe6zrjOCD/xezWEHLwdevxmbY3hzyhT25KBDF4F7YYW3YZcL3kylD/6dMoVISHjn8ggV3+9FY+5g==}
uhyphen@0.2.0:
resolution: {integrity: sha512-qz3o9CHXmJJPGBdqzab7qAYuW8kQGKNEuoHFYrBwV6hWIMcpAmxDLXojcHfFr9US1Pe6zUswEIJIbLI610fuqA==}
uint8array-extras@1.5.0:
resolution: {integrity: sha512-rvKSBiC5zqCCiDZ9kAOszZcDvdAHwwIKJG33Ykj43OKcWsnmcBRL09YTU4nOeHZ8Y2a7l1MgTd08SBe9A8Qj6A==}
engines: {node: '>=18'}
@@ -5253,6 +5314,8 @@ snapshots:
zod: 3.25.76
zod-to-json-schema: 3.25.1(zod@3.25.76)
'@mozilla/readability@0.6.0': {}
'@napi-rs/wasm-runtime@1.1.1':
dependencies:
'@emnapi/core': 1.8.1
@@ -6544,6 +6607,8 @@ snapshots:
transitivePeerDependencies:
- supports-color
boolbase@1.0.0: {}
bottleneck@2.19.5: {}
bowser@2.13.1: {}
@@ -6745,6 +6810,18 @@ snapshots:
shebang-command: 2.0.0
which: 2.0.2
css-select@5.2.2:
dependencies:
boolbase: 1.0.0
css-what: 6.2.2
domhandler: 5.0.3
domutils: 3.2.2
nth-check: 2.1.1
css-what@6.2.2: {}
cssom@0.5.0: {}
curve25519-js@0.0.4: {}
data-uri-to-buffer@4.0.1: {}
@@ -6777,10 +6854,28 @@ snapshots:
dependencies:
jszip: 3.10.1
dom-serializer@2.0.0:
dependencies:
domelementtype: 2.3.0
domhandler: 5.0.3
entities: 4.5.0
domelementtype@2.3.0: {}
domhandler@5.0.3:
dependencies:
domelementtype: 2.3.0
dompurify@3.3.1:
optionalDependencies:
'@types/trusted-types': 2.0.7
domutils@3.2.2:
dependencies:
dom-serializer: 2.0.0
domelementtype: 2.3.0
domhandler: 5.0.3
dotenv@17.2.3: {}
dunder-proto@1.0.1:
@@ -6808,6 +6903,8 @@ snapshots:
entities@4.5.0: {}
entities@6.0.1: {}
env-var@7.5.0:
optional: true
@@ -7157,8 +7254,17 @@ snapshots:
html-escaper@2.0.2: {}
html-escaper@3.0.3: {}
html-parse-string@0.0.9: {}
htmlparser2@10.0.0:
dependencies:
domelementtype: 2.3.0
domhandler: 5.0.3
domutils: 3.2.2
entities: 6.0.1
http-errors@2.0.1:
dependencies:
depd: 2.0.0
@@ -7436,6 +7542,14 @@ snapshots:
limiter@1.1.5: {}
linkedom@0.18.12:
dependencies:
css-select: 5.2.2
cssom: 0.5.0
html-escaper: 3.0.3
htmlparser2: 10.0.0
uhyphen: 0.2.0
linkify-it@5.0.0:
dependencies:
uc.micro: 2.1.0
@@ -7741,6 +7855,10 @@ snapshots:
set-blocking: 2.0.0
optional: true
nth-check@2.1.1:
dependencies:
boolbase: 1.0.0
object-assign@4.1.1: {}
object-inspect@1.13.4: {}
@@ -8537,6 +8655,8 @@ snapshots:
dependencies:
'@webreflection/alien-signals': 0.3.2
uhyphen@0.2.0: {}
uint8array-extras@1.5.0: {}
undici-types@7.16.0: {}