OSINT Platform

Guides and know-how

How to use the Spec Format

OSINT Industries' spec format allows standardizing data across all modules. It makes it simple to integrate it directly into your platform without any hassles or having to re-parse the whole thing. This short tutorial will explain how easy it is to use in Javascript and how you can integrate it into your platform.

To start, let's assume this is the response you received:

[
    {
        "module": "dropbox",
        "data": {
            "registered": true
        },
        "front_schemas": [
            {
                "module": "Dropbox",
                "body": {},
                "timeline": {
                    "last_seen": true,
                    "registered": true,
                    "last_seen_date": null,
                    "registered_date": null,
                    "groups": {},
                    "group_items": {},
                    "group_years": {}
                },
                "tags": []
            }
        ],
        "spec_format": [
            {
                "registered": {
                    "proper_key": "Registered",
                    "value": true,
                    "type": "bool"
                },
                "platform_variables": []
            }
        ],
        "status": "found",
        "query": "[email protected]",
        "from": "User supplied email.",
        "reliable_source": true
    },
    {
        "module": "duolingo",
        "data": {
            "users": [
                {
                    "joinedClassroomIds": [],
                    "streak": 0,
                    "motivation": "none",
                    "acquisitionSurveyReason": "none",
                    "shouldForceConnectPhoneNumber": false,
                    "picture": "//simg-ssl.duolingo.com/avatar/default_2",
                    "learningLanguage": "es",
                    "shakeToReportEnabled": null,
                    "liveOpsFeatures": [],
                    "canUseModerationTools": false,
                    "id": 2699322,
                    "betaStatus": "INELIGIBLE",
                    "hasGoogleId": false,
                    "privacySettings": [],
                    "fromLanguage": "en",
                    "hasRecentActivity15": false,
                    "_achievements": [],
                    "observedClassroomIds": [],
                    "username": "jj17686",
                    "bio": "",
                    "profileCountry": null,
                    "chinaUserModerationRecords": [],
                    "globalAmbassadorStatus": {},
                    "currentCourseId": "DUOLINGO_ES_EN",
                    "hasPhoneNumber": false,
                    "creationDate": 1365014226,
                    "achievements": [],
                    "hasPlus": false,
                    "hasFacebookId": false,
                    "roles": [
                        "users"
                    ],
                    "classroomLeaderboardsEnabled": false,
                    "emailVerified": false,
                    "courses": [
                        {
                            "preload": false,
                            "placementTestAvailable": true,
                            "authorId": "duolingo",
                            "title": "Spanish",
                            "learningLanguage": "es",
                            "xp": 0,
                            "healthEnabled": true,
                            "fromLanguage": "en",
                            "crowns": 9999,
                            "id": "DUOLINGO_ES_EN"
                        }
                    ],
                    "totalXp": 0,
                    "streakData": {
                        "currentStreak": null
                    }
                }
            ]
        },
        "front_schemas": [
            {
                "module": "Duolingo",
                "image": "https://simg-ssl.duolingo.com/avatar/default_2/xlarge",
                "body": {
                    "Username": "jj17686",
                    "Learning": "ES from EN",
                    "Premium": "False",
                    "Email Verified": "False",
                    "Has Phone Number": "False",
                    "Total XP": "0",
                    "Created At": "2013-04-03 18:37:06",
                    "ID": "2699322"
                },
                "tags": [
                    {
                        "tag": "Learning Spanish (0 XP)"
                    }
                ],
                "timeline": {
                    "last_seen": true,
                    "registered": true,
                    "last_seen_date": null,
                    "registered_date": "2013-04-03T18:37:06",
                    "groups": {},
                    "group_items": {},
                    "group_years": {}
                }
            }
        ],
        "spec_format": [
            {
                "registered": {
                    "proper_key": "Registered",
                    "value": true,
                    "type": "bool"
                },
                "id": {
                    "proper_key": "Id",
                    "value": "2699322",
                    "type": "str"
                },
                "picture_url": {
                    "proper_key": "Picture Url",
                    "value": "https://simg-ssl.duolingo.com/avatar/default_2/xlarge",
                    "type": "str"
                },
                "username": {
                    "proper_key": "Username",
                    "value": "jj17686",
                    "type": "str"
                },
                "profile_url": {
                    "proper_key": "Profile Url",
                    "value": "https://www.duolingo.com/profile/jj17686",
                    "type": "str"
                },
                "premium": {
                    "proper_key": "Premium",
                    "value": false,
                    "type": "bool"
                },
                "creation_date": {
                    "proper_key": "Creation Date",
                    "value": "2013-04-03T18:37:06",
                    "type": "datetime"
                },
                "platform_variables": [
                    {
                        "key": "learning_language",
                        "proper_key": "Learning Language",
                        "value": "es",
                        "type": "str"
                    },
                    {
                        "key": "from_language",
                        "proper_key": "From Language",
                        "value": "en",
                        "type": "str"
                    },
                    {
                        "key": "email_verified",
                        "proper_key": "Email Verified",
                        "value": false,
                        "type": "bool"
                    },
                    {
                        "key": "has_phone_number",
                        "proper_key": "Has Phone Number",
                        "value": false,
                        "type": "bool"
                    },
                    {
                        "key": "motivation",
                        "proper_key": "Motivation",
                        "value": "none",
                        "type": "str"
                    },
                    {
                        "key": "roles",
                        "proper_key": "Roles",
                        "value": [
                            "users"
                        ],
                        "type": "list"
                    },
                    {
                        "key": "total_xp",
                        "proper_key": "Total Xp",
                        "value": 0,
                        "type": "int"
                    },
                    {
                        "key": "streak",
                        "proper_key": "Streak",
                        "value": 0,
                        "type": "int"
                    },
                    {
                        "key": "global_ambassador_status",
                        "proper_key": "Global Ambassador Status",
                        "value": {},
                        "type": "dict"
                    },
                    {
                        "key": "current_course_id",
                        "proper_key": "Current Course Id",
                        "value": "DUOLINGO_ES_EN",
                        "type": "str"
                    },
                    {
                        "key": "streak_data",
                        "proper_key": "Streak Data",
                        "value": {
                            "currentStreak": null
                        },
                        "type": "dict"
                    },
                    {
                        "key": "courses",
                        "proper_key": "Courses",
                        "value": [
                            {
                                "preload": false,
                                "placementTestAvailable": true,
                                "authorId": "duolingo",
                                "title": "Spanish",
                                "learningLanguage": "es",
                                "xp": 0,
                                "healthEnabled": true,
                                "fromLanguage": "en",
                                "crowns": 9999,
                                "id": "DUOLINGO_ES_EN"
                            }
                        ],
                        "type": "list"
                    }
                ]
            }
        ],
        "status": "found",
        "query": "[email protected]",
        "from": "User supplied email.",
        "reliable_source": true
    }
]

Let's start by printing each response object's spec format into the console to see what it returns:

jsonResponse.forEach(dataObj => {
    const specFormatList = dataObj.spec_format
    console.log(dataObj.spec_format)
})

You may notice that each specFormat is list. This is normal as, while normally each module will return one singular profile, some will return multiple. It's therefore handled as a list.

We will now loop through this list to see individual spec format objects:

jsonResponse.forEach(dataObj => {
    const specFormatList = dataObj.spec_format
    specFormatList.forEach(specFormat => {
        console.log(specFormat)
    })
})

Now you'll see something that resembles an object with clear keys and values that are denoted specifically. We standardise the following variables:

AttributeTypeNotes
registeredboolAlways true
platform_variablesobjectAn identical object to the spec format that contains key value pairs of attributes that could not be standardized.
breachboolOnly relevant for HIBP. OSINT Industries does not provide breach data.
idstr | int
namestr
first_namestr
last_namestr
picture_urlstr
genderstr
agestr
languagestr
locationstr
usernamestr
profile_urlstr
banner_urlstr
emailstrMay be a string list of values separated by a ", ".
phonestrMay be a string list of values separated by a ", ".
email_hintstrMay be a string list of values separated by a ", ".
phone_hintstrMay be a string list of values separated by a ", ".
websitestr
biostr
followersint
followingint
verifiedbool
premiumbool
privatebool
devicesstr
last_seenstrISO 8601 format.
creation_datestrISO 8601 format.

Let's loop over the object now to obtain these values:

jsonResponse.forEach(dataObj => {
    const specFormatList = dataObj.spec_format
    specFormatList.forEach(specFormat => {
        for (const [key, value] of Object.entries(specFormat)) {
            console.log(key, value)
        }
    })
})

This is typically close to the last stage of integration. Your valuevariable will resemble something like this:

{
  proper_key: 'Profile Url',
  value: 'https://www.duolingo.com/profile/jj17686',
  type: 'str'
}

As you can see, we specify:

  • proper_key: to help you maintain clean names inside your platform
  • value: the value of the spec format key
  • type: the value's type

For this tutorial, we will set up an object and fill it with module responses. Inside each module response, we will store another object that contains a list of keys and their direct values.

Let's use the data attributes described above:

jsonResponse.forEach(dataObj => {
    const specFormatList = dataObj.spec_format
    const moduleName = dataObj.module

    parsedResponse[moduleName] = {}

    specFormatList.forEach(specFormat => {
        for (const [key, value] of Object.entries(specFormat)) {
            parsedResponse[moduleName][value.proper_key] = value.value
        }
    })
})

Which will give us something akin to:

{
    "dropbox": {
        "Registered": true
    },
    "duolingo": {
        "Registered": true,
        "Id": "2699322",
        "Picture Url": "https://simg-ssl.duolingo.com/avatar/default_2/xlarge",
        "Username": "jj17686",
        "Profile Url": "https://www.duolingo.com/profile/jj17686",
        "Premium": false,
        "Creation Date": "2013-04-03T18:37:06"
    }
}

But that's not all. We still haven't parsed platform_variableswhich contain very crucial data. Let's make an exception for it and parse it accordingly:

jsonResponse.forEach(dataObj => {
    const specFormatList = dataObj.spec_format
    const moduleName = dataObj.module

    parsedResponse[moduleName] = {}

    specFormatList.forEach(specFormat => {
        for (const [key, value] of Object.entries(specFormat)) {
            if (key === 'platform_variables') {
                value.forEach(platformVariable => {
                    if (['str', 'int', 'float'].includes(platformVariable.type)) {
                        parsedResponse[moduleName][platformVariable.proper_key] = platformVariable.value
                    }
                })                
            }
            else{
                parsedResponse[moduleName][value.proper_key] = value.value
            }
        }
    })
})

Let's go over the changes we made:

  1. Check for the platform_variableskey.
  2. If the check passes, loop through it's values
  3. Check if the type of value is str,int or float(Platform variables are not as controlled as other attributes in the spec format, therefore some of them might be lists or dictionaries, for the purpose of keeping this guide simple, we will only parse string, int and float values.)
  4. Append the platform variable's proper key to our parsedResponseobject with it's value.

Our results now look like this:

{
    "dropbox": {
        "Registered": true
    },
    "duolingo": {
        "Registered": true,
        "Id": "2699322",
        "Picture Url": "https://simg-ssl.duolingo.com/avatar/default_2/xlarge",
        "Username": "jj17686",
        "Profile Url": "https://www.duolingo.com/profile/jj17686",
        "Premium": false,
        "Creation Date": "2013-04-03T18:37:06",
        "Learning Language": "es",
        "From Language": "en",
        "Motivation": "none",
        "Total Xp": 0,
        "Streak": 0,
        "Current Course Id": "DUOLINGO_ES_EN"
    }
}

We have successfully transformed the response into something we can easily manipulate and integrate into our own platform.