> For clean Markdown of any page, append .md to the page URL.
> For a complete documentation index, see https://docs.usescout.sh/llms.txt.
> For full documentation content, see https://docs.usescout.sh/llms-full.txt.
> For AI client integration (Claude Code, Cursor, etc.), connect to the MCP server at https://docs.usescout.sh/_mcp/server.

# Extract a URL

GET https://core.usescout.sh/v1/extract

Reference: https://docs.usescout.sh/api-reference/scout/extract/get-v-1-extract-get

## OpenAPI Specification

```yaml
openapi: 3.1.0
info:
  title: Scout
  version: 1.0.0
paths:
  /v1/extract:
    get:
      operationId: get-v-1-extract-get
      summary: Extract a URL
      tags:
        - subpackage_extract
      parameters:
        - name: url
          in: query
          required: true
          schema:
            type: string
        - name: max_chars
          in: query
          required: false
          schema:
            type:
              - integer
              - 'null'
        - name: Authorization
          in: header
          description: Your API key, sent as a Bearer token.
          required: true
          schema:
            type: string
      responses:
        '200':
          description: Successful Response
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ExtractResponse'
        '422':
          description: Validation Error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/HTTPValidationError'
servers:
  - url: https://core.usescout.sh
components:
  schemas:
    ExtractSubpage:
      type: object
      properties:
        url:
          type: string
          description: The subpage URL that was extracted.
        title:
          type: string
          default: ''
          description: The subpage title.
        excerpts:
          type: array
          items:
            type: string
          description: Excerpts from the subpage.
        relevance_score:
          type:
            - number
            - 'null'
          format: double
          description: >-
            0..1 score from the objective-aware ranker (None when the ranker did
            not run for this candidate).
        discovered_via:
          type:
            - string
            - 'null'
          description: 'How the subpage was discovered: "html", "slug_guess" or "sitemap".'
      required:
        - url
      title: ExtractSubpage
    ExtractExtras:
      type: object
      properties:
        links:
          type: array
          items:
            type: string
          description: Outbound link URLs from the page (capped to extras.links).
        images:
          type: array
          items:
            type: string
          description: Image URLs from the page (capped to extras.images).
      title: ExtractExtras
    ExtractResult:
      type: object
      properties:
        url:
          type: string
          description: The URL that was extracted.
        title:
          type: string
          default: ''
          description: The page title.
        publish_date:
          type:
            - string
            - 'null'
          description: Publication date (YYYY-MM-DD), parsed from page meta.
        author:
          type:
            - string
            - 'null'
          description: Author, parsed from page meta or JSON-LD.
        image:
          type:
            - string
            - 'null'
          description: og:image URL, when present.
        favicon:
          type:
            - string
            - 'null'
          description: Page favicon URL (absolute).
        excerpts:
          type: array
          items:
            type: string
          description: >-
            Focused excerpts (driven by `objective` / `search_queries` when
            supplied; otherwise default paragraph sample).
        full_content:
          type:
            - string
            - 'null'
          description: >-
            Full page markdown - only when `advanced_settings.full_content` is
            truthy.
        summary:
          description: >-
            Summary - string if no schema was supplied, structured object if
            `summary.schema` was supplied; null if `summary` was not requested.
        subpages:
          type:
            - array
            - 'null'
          items:
            $ref: '#/components/schemas/ExtractSubpage'
          description: Per-subpage results when `advanced_settings.subpages > 0`.
        extras:
          oneOf:
            - $ref: '#/components/schemas/ExtractExtras'
            - type: 'null'
          description: Extra page side-data (links / images), when requested.
        content:
          type: string
          default: ''
          description: >-
            DEPRECATED alias - copies `full_content` (or joined `excerpts` when
            full_content was not requested).
        data:
          type:
            - object
            - 'null'
          additionalProperties:
            description: Any type
          description: DEPRECATED alias of `summary` when `summary` is a dict.
        error:
          type:
            - string
            - 'null'
          description: >-
            DEPRECATED - per-URL failures now live in the top-level `errors[]`;
            this stays for old clients.
      required:
        - url
      title: ExtractResult
    ExtractError:
      type: object
      properties:
        url:
          type: string
          description: The URL that failed.
        error_type:
          type: string
          description: >-
            One of: fetch_error, parse_error, timeout, blocked,
            validation_error.
        http_status_code:
          type:
            - integer
            - 'null'
          description: Upstream HTTP status when known.
        detail:
          type: string
          default: ''
          description: Human-readable failure detail.
      required:
        - url
        - error_type
      title: ExtractError
    ExtractStatusStatus:
      type: string
      enum:
        - success
        - error
      description: success or error.
      title: ExtractStatusStatus
    ExtractStatusSource:
      type: string
      enum:
        - cached
        - live
      default: live
      description: Whether the page came from cache or a live fetch.
      title: ExtractStatusSource
    ExtractStatus:
      type: object
      properties:
        url:
          type: string
          description: The URL.
        status:
          $ref: '#/components/schemas/ExtractStatusStatus'
          description: success or error.
        source:
          $ref: '#/components/schemas/ExtractStatusSource'
          description: Whether the page came from cache or a live fetch.
      required:
        - url
        - status
      title: ExtractStatus
    SearchUsageItem:
      type: object
      properties:
        name:
          type: string
          description: SKU name, e.g. `sku_search`.
        count:
          type: integer
          description: Units of this SKU consumed.
      required:
        - name
        - count
      title: SearchUsageItem
    ExtractResponse:
      type: object
      properties:
        extract_id:
          type: string
          description: Opaque id for this extract call.
        session_id:
          type: string
          description: Caller-supplied or generated.
        results:
          type: array
          items:
            $ref: '#/components/schemas/ExtractResult'
          description: One result per successfully extracted URL.
        errors:
          type: array
          items:
            $ref: '#/components/schemas/ExtractError'
          description: Per-URL failures (not present in `results`).
        statuses:
          type: array
          items:
            $ref: '#/components/schemas/ExtractStatus'
          description: Per-URL status row (success or error; cached or live).
        warnings:
          type:
            - array
            - 'null'
          items:
            type: string
          description: Reserved for future use.
        credits:
          type: integer
          description: >-
            Cost - 2 credits per URL successfully extracted, +1 per URL that hit
            the LLM (focused excerpts or summary).
        subpages_discovered:
          type: integer
          default: 0
          description: >-
            Total subpage candidates surfaced across all parent URLs before
            objective-aware ranking trimmed the list.
        subpages_extracted:
          type: integer
          default: 0
          description: Total subpages actually fetched and returned across all parent URLs.
        usage:
          type:
            - array
            - 'null'
          items:
            $ref: '#/components/schemas/SearchUsageItem'
          description: >-
            Parallel-style usage line items broken down by SKU (currently
            surfaces `sku_subpage_rank` when the objective-aware subpage ranker
            ran).
        scratchpad:
          type:
            - object
            - 'null'
          additionalProperties:
            description: Any type
          description: >-
            Per-request scratchpad payload when SCRATCHPAD_FIRST=true and an
            objective was provided. Carries the single tool-use Claude answer
            plus retrieval stats.
      required:
        - extract_id
        - session_id
        - results
        - credits
      title: ExtractResponse
    ValidationErrorLocItems:
      oneOf:
        - type: string
        - type: integer
      title: ValidationErrorLocItems
    ValidationErrorCtx:
      type: object
      properties: {}
      title: ValidationErrorCtx
    ValidationError:
      type: object
      properties:
        loc:
          type: array
          items:
            $ref: '#/components/schemas/ValidationErrorLocItems'
        msg:
          type: string
        type:
          type: string
        input:
          description: Any type
        ctx:
          $ref: '#/components/schemas/ValidationErrorCtx'
      required:
        - loc
        - msg
        - type
      title: ValidationError
    HTTPValidationError:
      type: object
      properties:
        detail:
          type: array
          items:
            $ref: '#/components/schemas/ValidationError'
      title: HTTPValidationError
  securitySchemes:
    apiKey:
      type: http
      scheme: bearer
      description: Your API key, sent as a Bearer token.

```

## SDK Code Examples

```python Extract_getV1ExtractGet_example
import requests

url = "https://core.usescout.sh/v1/extract"

querystring = {"url":"https://www.anthropic.com/company","max_chars":"1000"}

payload = {}
headers = {
    "Authorization": "Bearer <token>",
    "Content-Type": "application/json"
}

response = requests.get(url, json=payload, headers=headers, params=querystring)

print(response.json())
```

```javascript Extract_getV1ExtractGet_example
const url = 'https://core.usescout.sh/v1/extract?url=https%3A%2F%2Fwww.anthropic.com%2Fcompany&max_chars=1000';
const options = {
  method: 'GET',
  headers: {Authorization: 'Bearer <token>', 'Content-Type': 'application/json'},
  body: '{}'
};

try {
  const response = await fetch(url, options);
  const data = await response.json();
  console.log(data);
} catch (error) {
  console.error(error);
}
```

```go Extract_getV1ExtractGet_example
package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://core.usescout.sh/v1/extract?url=https%3A%2F%2Fwww.anthropic.com%2Fcompany&max_chars=1000"

	payload := strings.NewReader("{}")

	req, _ := http.NewRequest("GET", url, payload)

	req.Header.Add("Authorization", "Bearer <token>")
	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(res)
	fmt.Println(string(body))

}
```

```ruby Extract_getV1ExtractGet_example
require 'uri'
require 'net/http'

url = URI("https://core.usescout.sh/v1/extract?url=https%3A%2F%2Fwww.anthropic.com%2Fcompany&max_chars=1000")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Get.new(url)
request["Authorization"] = 'Bearer <token>'
request["Content-Type"] = 'application/json'
request.body = "{}"

response = http.request(request)
puts response.read_body
```

```java Extract_getV1ExtractGet_example
import com.mashape.unirest.http.HttpResponse;
import com.mashape.unirest.http.Unirest;

HttpResponse<String> response = Unirest.get("https://core.usescout.sh/v1/extract?url=https%3A%2F%2Fwww.anthropic.com%2Fcompany&max_chars=1000")
  .header("Authorization", "Bearer <token>")
  .header("Content-Type", "application/json")
  .body("{}")
  .asString();
```

```php Extract_getV1ExtractGet_example
<?php
require_once('vendor/autoload.php');

$client = new \GuzzleHttp\Client();

$response = $client->request('GET', 'https://core.usescout.sh/v1/extract?url=https%3A%2F%2Fwww.anthropic.com%2Fcompany&max_chars=1000', [
  'body' => '{}',
  'headers' => [
    'Authorization' => 'Bearer <token>',
    'Content-Type' => 'application/json',
  ],
]);

echo $response->getBody();
```

```csharp Extract_getV1ExtractGet_example
using RestSharp;

var client = new RestClient("https://core.usescout.sh/v1/extract?url=https%3A%2F%2Fwww.anthropic.com%2Fcompany&max_chars=1000");
var request = new RestRequest(Method.GET);
request.AddHeader("Authorization", "Bearer <token>");
request.AddHeader("Content-Type", "application/json");
request.AddParameter("application/json", "{}", ParameterType.RequestBody);
IRestResponse response = client.Execute(request);
```

```swift Extract_getV1ExtractGet_example
import Foundation

let headers = [
  "Authorization": "Bearer <token>",
  "Content-Type": "application/json"
]
let parameters = [] as [String : Any]

let postData = JSONSerialization.data(withJSONObject: parameters, options: [])

let request = NSMutableURLRequest(url: NSURL(string: "https://core.usescout.sh/v1/extract?url=https%3A%2F%2Fwww.anthropic.com%2Fcompany&max_chars=1000")! as URL,
                                        cachePolicy: .useProtocolCachePolicy,
                                    timeoutInterval: 10.0)
request.httpMethod = "GET"
request.allHTTPHeaderFields = headers
request.httpBody = postData as Data

let session = URLSession.shared
let dataTask = session.dataTask(with: request as URLRequest, completionHandler: { (data, response, error) -> Void in
  if (error != nil) {
    print(error as Any)
  } else {
    let httpResponse = response as? HTTPURLResponse
    print(httpResponse)
  }
})

dataTask.resume()
```