> For clean Markdown of any page, append .md to the page URL.
> For a complete documentation index, see https://docs.usescout.sh/llms.txt.
> For full documentation content, see https://docs.usescout.sh/llms-full.txt.
> For AI client integration (Claude Code, Cursor, etc.), connect to the MCP server at https://docs.usescout.sh/_mcp/server.

# Extract

POST https://core.usescout.sh/v1/extract
Content-Type: application/json

Extract clean content from 1-20 URLs.

Default mode returns per-URL `excerpts` (paragraph sample). Supplying
`objective` and/or `search_queries` focuses excerpts via an LLM call.
Toggle `advanced_settings.full_content` to also return full markdown.
Provide `advanced_settings.summary` to also run a summarization pass.

Reference: https://docs.usescout.sh/api-reference/scout/extract/post-v-1-extract-post

## OpenAPI Specification

```yaml
openapi: 3.1.0
info:
  title: Scout
  version: 1.0.0
paths:
  /v1/extract:
    post:
      operationId: post-v-1-extract-post
      summary: Extract
      description: |-
        Extract clean content from 1-20 URLs.

        Default mode returns per-URL `excerpts` (paragraph sample). Supplying
        `objective` and/or `search_queries` focuses excerpts via an LLM call.
        Toggle `advanced_settings.full_content` to also return full markdown.
        Provide `advanced_settings.summary` to also run a summarization pass.
      tags:
        - subpackage_extract
      parameters:
        - name: Authorization
          in: header
          description: Your API key, sent as a Bearer token.
          required: true
          schema:
            type: string
      responses:
        '200':
          description: Successful Response
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ExtractResponse'
        '422':
          description: Validation Error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/HTTPValidationError'
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/ExtractRequestV1'
servers:
  - url: https://core.usescout.sh
components:
  schemas:
    FetchPolicy:
      type: object
      properties:
        max_age_seconds:
          type: integer
          default: 86400
          description: >-
            Maximum cache age (in seconds) before a live fetch is preferred.
            Minimum 600.
        timeout_seconds:
          type: integer
          default: 30
          description: Hard timeout for a single live fetch.
        disable_cache_fallback:
          type: boolean
          default: false
          description: >-
            If true, a live-fetch failure errors out instead of returning a
            stale cached page.
      title: FetchPolicy
    ExcerptSettings:
      type: object
      properties:
        max_chars_per_result:
          type: integer
          default: 2000
          description: >-
            Per-URL cap on excerpt characters (values below 1000 are coerced to
            1000).
      title: ExcerptSettings
    SummarySettings:
      type: object
      properties:
        query:
          type:
            - string
            - 'null'
          description: Summarization prompt. If omitted, no summary is run.
        schema:
          type:
            - object
            - 'null'
          additionalProperties:
            description: Any type
          description: >-
            Optional JSON Schema (bare). When set, the summary is a structured
            object matching this shape; otherwise a plain string.
        response_format:
          type:
            - object
            - 'null'
          additionalProperties:
            description: Any type
          description: >-
            OpenAI-compatible response_format. Accepts either
            `{type:'json_schema', json_schema:{name, schema, strict?,
            description?}}` (full OpenAI shape) or `{name, schema, strict?,
            description?}` (just the inner json_schema part). When set,
            overrides `json_schema`/`schema`.
      title: SummarySettings
    AdvancedSettings:
      type: object
      properties:
        fetch_policy:
          $ref: '#/components/schemas/FetchPolicy'
        excerpt_settings:
          $ref: '#/components/schemas/ExcerptSettings'
        full_content:
          description: >-
            Bool or { 'max_chars_per_result': N } - when truthy, include the
            page's full markdown on the result.
        summary:
          oneOf:
            - $ref: '#/components/schemas/SummarySettings'
            - type: 'null'
          description: >-
            When set, run a summarization call with the given query (and
            optional JSON schema).
        subpages:
          type:
            - integer
            - 'null'
          description: >-
            Fetch up to N same-host internal links and include their excerpts on
            the parent result. When unset (None) and `objective` is provided,
            defaults to 5 with objective-aware candidate discovery and LLM
            ranking. Pass 0 explicitly to disable subpage fan-out.
      title: AdvancedSettings
    Extras:
      type: object
      properties:
        links:
          type: integer
          default: 0
          description: Up to N outbound links to include on each result.
        images:
          type: integer
          default: 0
          description: Up to N images to include on each result.
      title: Extras
    ExtractRequestV1:
      type: object
      properties:
        urls:
          type: array
          items:
            type: string
          description: Public http(s) URLs to extract (1-20).
        objective:
          type:
            - string
            - 'null'
          description: What you're looking for. Focuses the per-URL excerpts.
        search_queries:
          type:
            - array
            - 'null'
          items:
            type: string
          description: >-
            Keyword hints. Combined with `objective` to focus the extracted
            excerpts.
        max_chars_total:
          type:
            - integer
            - 'null'
          description: Cap on excerpt characters across all URLs combined.
        session_id:
          type:
            - string
            - 'null'
          description: >-
            Optional caller-supplied session id. Echoed back; if absent a new
            one is generated.
        client_model:
          type:
            - string
            - 'null'
          description: Caller's model identifier. Echoed only; not used.
        advanced_settings:
          $ref: '#/components/schemas/AdvancedSettings'
          description: >-
            Per-call advanced tuning - fetch policy, excerpt settings,
            full_content, summary and subpages.
        extras:
          $ref: '#/components/schemas/Extras'
          description: 'Side data: include up to N outbound links and/or images per result.'
        max_chars:
          type:
            - integer
            - 'null'
          description: >-
            DEPRECATED alias of
            `advanced_settings.excerpt_settings.max_chars_per_result`.
        output_schema:
          type:
            - object
            - 'null'
          additionalProperties:
            description: Any type
          description: DEPRECATED alias of `advanced_settings.summary.schema`.
      required:
        - urls
      title: ExtractRequestV1
    ExtractSubpage:
      type: object
      properties:
        url:
          type: string
          description: The subpage URL that was extracted.
        title:
          type: string
          default: ''
          description: The subpage title.
        excerpts:
          type: array
          items:
            type: string
          description: Excerpts from the subpage.
        relevance_score:
          type:
            - number
            - 'null'
          format: double
          description: >-
            0..1 score from the objective-aware ranker (None when the ranker did
            not run for this candidate).
        discovered_via:
          type:
            - string
            - 'null'
          description: 'How the subpage was discovered: "html", "slug_guess" or "sitemap".'
      required:
        - url
      title: ExtractSubpage
    ExtractExtras:
      type: object
      properties:
        links:
          type: array
          items:
            type: string
          description: Outbound link URLs from the page (capped to extras.links).
        images:
          type: array
          items:
            type: string
          description: Image URLs from the page (capped to extras.images).
      title: ExtractExtras
    ExtractResult:
      type: object
      properties:
        url:
          type: string
          description: The URL that was extracted.
        title:
          type: string
          default: ''
          description: The page title.
        publish_date:
          type:
            - string
            - 'null'
          description: Publication date (YYYY-MM-DD), parsed from page meta.
        author:
          type:
            - string
            - 'null'
          description: Author, parsed from page meta or JSON-LD.
        image:
          type:
            - string
            - 'null'
          description: og:image URL, when present.
        favicon:
          type:
            - string
            - 'null'
          description: Page favicon URL (absolute).
        excerpts:
          type: array
          items:
            type: string
          description: >-
            Focused excerpts (driven by `objective` / `search_queries` when
            supplied; otherwise default paragraph sample).
        full_content:
          type:
            - string
            - 'null'
          description: >-
            Full page markdown - only when `advanced_settings.full_content` is
            truthy.
        summary:
          description: >-
            Summary - string if no schema was supplied, structured object if
            `summary.schema` was supplied; null if `summary` was not requested.
        subpages:
          type:
            - array
            - 'null'
          items:
            $ref: '#/components/schemas/ExtractSubpage'
          description: Per-subpage results when `advanced_settings.subpages > 0`.
        extras:
          oneOf:
            - $ref: '#/components/schemas/ExtractExtras'
            - type: 'null'
          description: Extra page side-data (links / images), when requested.
        content:
          type: string
          default: ''
          description: >-
            DEPRECATED alias - copies `full_content` (or joined `excerpts` when
            full_content was not requested).
        data:
          type:
            - object
            - 'null'
          additionalProperties:
            description: Any type
          description: DEPRECATED alias of `summary` when `summary` is a dict.
        error:
          type:
            - string
            - 'null'
          description: >-
            DEPRECATED - per-URL failures now live in the top-level `errors[]`;
            this stays for old clients.
      required:
        - url
      title: ExtractResult
    ExtractError:
      type: object
      properties:
        url:
          type: string
          description: The URL that failed.
        error_type:
          type: string
          description: >-
            One of: fetch_error, parse_error, timeout, blocked,
            validation_error.
        http_status_code:
          type:
            - integer
            - 'null'
          description: Upstream HTTP status when known.
        detail:
          type: string
          default: ''
          description: Human-readable failure detail.
      required:
        - url
        - error_type
      title: ExtractError
    ExtractStatusStatus:
      type: string
      enum:
        - success
        - error
      description: success or error.
      title: ExtractStatusStatus
    ExtractStatusSource:
      type: string
      enum:
        - cached
        - live
      default: live
      description: Whether the page came from cache or a live fetch.
      title: ExtractStatusSource
    ExtractStatus:
      type: object
      properties:
        url:
          type: string
          description: The URL.
        status:
          $ref: '#/components/schemas/ExtractStatusStatus'
          description: success or error.
        source:
          $ref: '#/components/schemas/ExtractStatusSource'
          description: Whether the page came from cache or a live fetch.
      required:
        - url
        - status
      title: ExtractStatus
    SearchUsageItem:
      type: object
      properties:
        name:
          type: string
          description: SKU name, e.g. `sku_search`.
        count:
          type: integer
          description: Units of this SKU consumed.
      required:
        - name
        - count
      title: SearchUsageItem
    ExtractResponse:
      type: object
      properties:
        extract_id:
          type: string
          description: Opaque id for this extract call.
        session_id:
          type: string
          description: Caller-supplied or generated.
        results:
          type: array
          items:
            $ref: '#/components/schemas/ExtractResult'
          description: One result per successfully extracted URL.
        errors:
          type: array
          items:
            $ref: '#/components/schemas/ExtractError'
          description: Per-URL failures (not present in `results`).
        statuses:
          type: array
          items:
            $ref: '#/components/schemas/ExtractStatus'
          description: Per-URL status row (success or error; cached or live).
        warnings:
          type:
            - array
            - 'null'
          items:
            type: string
          description: Reserved for future use.
        credits:
          type: integer
          description: >-
            Cost - 2 credits per URL successfully extracted, +1 per URL that hit
            the LLM (focused excerpts or summary).
        subpages_discovered:
          type: integer
          default: 0
          description: >-
            Total subpage candidates surfaced across all parent URLs before
            objective-aware ranking trimmed the list.
        subpages_extracted:
          type: integer
          default: 0
          description: Total subpages actually fetched and returned across all parent URLs.
        usage:
          type:
            - array
            - 'null'
          items:
            $ref: '#/components/schemas/SearchUsageItem'
          description: >-
            Parallel-style usage line items broken down by SKU (currently
            surfaces `sku_subpage_rank` when the objective-aware subpage ranker
            ran).
        scratchpad:
          type:
            - object
            - 'null'
          additionalProperties:
            description: Any type
          description: >-
            Per-request scratchpad payload when SCRATCHPAD_FIRST=true and an
            objective was provided. Carries the single tool-use Claude answer
            plus retrieval stats.
      required:
        - extract_id
        - session_id
        - results
        - credits
      title: ExtractResponse
    ValidationErrorLocItems:
      oneOf:
        - type: string
        - type: integer
      title: ValidationErrorLocItems
    ValidationErrorCtx:
      type: object
      properties: {}
      title: ValidationErrorCtx
    ValidationError:
      type: object
      properties:
        loc:
          type: array
          items:
            $ref: '#/components/schemas/ValidationErrorLocItems'
        msg:
          type: string
        type:
          type: string
        input:
          description: Any type
        ctx:
          $ref: '#/components/schemas/ValidationErrorCtx'
      required:
        - loc
        - msg
        - type
      title: ValidationError
    HTTPValidationError:
      type: object
      properties:
        detail:
          type: array
          items:
            $ref: '#/components/schemas/ValidationError'
      title: HTTPValidationError
  securitySchemes:
    apiKey:
      type: http
      scheme: bearer
      description: Your API key, sent as a Bearer token.

```

## SDK Code Examples

```python Extract_postV1ExtractPost_example
import requests

url = "https://core.usescout.sh/v1/extract"

payload = {
    "urls": ["https://www.anthropic.com/company"],
    "output_schema": {
        "type": "object",
        "properties": {}
    }
}
headers = {
    "Authorization": "Bearer <token>",
    "Content-Type": "application/json"
}

response = requests.post(url, json=payload, headers=headers)

print(response.json())
```

```javascript Extract_postV1ExtractPost_example
const url = 'https://core.usescout.sh/v1/extract';
const options = {
  method: 'POST',
  headers: {Authorization: 'Bearer <token>', 'Content-Type': 'application/json'},
  body: '{"urls":["https://www.anthropic.com/company"],"output_schema":{"type":"object","properties":{}}}'
};

try {
  const response = await fetch(url, options);
  const data = await response.json();
  console.log(data);
} catch (error) {
  console.error(error);
}
```

```go Extract_postV1ExtractPost_example
package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://core.usescout.sh/v1/extract"

	payload := strings.NewReader("{\n  \"urls\": [\n    \"https://www.anthropic.com/company\"\n  ],\n  \"output_schema\": {\n    \"type\": \"object\",\n    \"properties\": {}\n  }\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Authorization", "Bearer <token>")
	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(res)
	fmt.Println(string(body))

}
```

```ruby Extract_postV1ExtractPost_example
require 'uri'
require 'net/http'

url = URI("https://core.usescout.sh/v1/extract")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Authorization"] = 'Bearer <token>'
request["Content-Type"] = 'application/json'
request.body = "{\n  \"urls\": [\n    \"https://www.anthropic.com/company\"\n  ],\n  \"output_schema\": {\n    \"type\": \"object\",\n    \"properties\": {}\n  }\n}"

response = http.request(request)
puts response.read_body
```

```java Extract_postV1ExtractPost_example
import com.mashape.unirest.http.HttpResponse;
import com.mashape.unirest.http.Unirest;

HttpResponse<String> response = Unirest.post("https://core.usescout.sh/v1/extract")
  .header("Authorization", "Bearer <token>")
  .header("Content-Type", "application/json")
  .body("{\n  \"urls\": [\n    \"https://www.anthropic.com/company\"\n  ],\n  \"output_schema\": {\n    \"type\": \"object\",\n    \"properties\": {}\n  }\n}")
  .asString();
```

```php Extract_postV1ExtractPost_example
<?php
require_once('vendor/autoload.php');

$client = new \GuzzleHttp\Client();

$response = $client->request('POST', 'https://core.usescout.sh/v1/extract', [
  'body' => '{
  "urls": [
    "https://www.anthropic.com/company"
  ],
  "output_schema": {
    "type": "object",
    "properties": {}
  }
}',
  'headers' => [
    'Authorization' => 'Bearer <token>',
    'Content-Type' => 'application/json',
  ],
]);

echo $response->getBody();
```

```csharp Extract_postV1ExtractPost_example
using RestSharp;

var client = new RestClient("https://core.usescout.sh/v1/extract");
var request = new RestRequest(Method.POST);
request.AddHeader("Authorization", "Bearer <token>");
request.AddHeader("Content-Type", "application/json");
request.AddParameter("application/json", "{\n  \"urls\": [\n    \"https://www.anthropic.com/company\"\n  ],\n  \"output_schema\": {\n    \"type\": \"object\",\n    \"properties\": {}\n  }\n}", ParameterType.RequestBody);
IRestResponse response = client.Execute(request);
```

```swift Extract_postV1ExtractPost_example
import Foundation

let headers = [
  "Authorization": "Bearer <token>",
  "Content-Type": "application/json"
]
let parameters = [
  "urls": ["https://www.anthropic.com/company"],
  "output_schema": [
    "type": "object",
    "properties": []
  ]
] as [String : Any]

let postData = JSONSerialization.data(withJSONObject: parameters, options: [])

let request = NSMutableURLRequest(url: NSURL(string: "https://core.usescout.sh/v1/extract")! as URL,
                                        cachePolicy: .useProtocolCachePolicy,
                                    timeoutInterval: 10.0)
request.httpMethod = "POST"
request.allHTTPHeaderFields = headers
request.httpBody = postData as Data

let session = URLSession.shared
let dataTask = session.dataTask(with: request as URLRequest, completionHandler: { (data, response, error) -> Void in
  if (error != nil) {
    print(error as Any)
  } else {
    let httpResponse = response as? HTTPURLResponse
    print(httpResponse)
  }
})

dataTask.resume()
```