All Downloads are FREE. Search and download functionalities are using the official Maven repository.

commonMain.com.mugames.vidsnapkit.extractor.LinkedIn.kt Maven / Gradle / Ivy

Go to download

Used to extract video urls from popular sites (Instagram, Facebook, DailyMotion, LinkedIn)

There is a newer version: 5.8.0
Show newest version
/*
 *    Copyright (c) 2023 Udhayarajan M
 *
 *    Licensed under the Apache License, Version 2.0 (the "License");
 *    you may not use this file except in compliance with the License.
 *    You may obtain a copy of the License at
 *
 *        http://www.apache.org/licenses/LICENSE-2.0
 *
 *    Unless required by applicable law or agreed to in writing, software
 *    distributed under the License is distributed on an "AS IS" BASIS,
 *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *    See the License for the specific language governing permissions and
 *    limitations under the License.
 *
 */

package com.mugames.vidsnapkit.extractor

import com.mugames.vidsnapkit.Util
import com.mugames.vidsnapkit.dataholders.*
import com.mugames.vidsnapkit.toJSONObject
import org.json.JSONArray
import java.util.regex.Pattern

/**
 * @author Udhaya
 * Created on 24-05-2022
 */

class LinkedIn internal constructor(url: String) : Extractor(url) {
    private val formats = Formats()

    override suspend fun analyze(payload: Any?) {
        formats.url = inputUrl
        formats.src = "LinkedIn"
        onProgress(Result.Progress(ProgressState.Start))
        scratchWebpage(
            httpRequestService.getResponse(inputUrl, headers) ?: run {
                clientRequestError()
                return
            }
        )
    }

    override suspend fun testWebpage(string: String) {
        TODO("Not yet implemented")
    }

    private suspend fun scratchWebpage(page: String) {
        val matcher = Pattern.compile("data-sources=\"(.*?)\"").matcher(page)
        if (matcher.find()) {
            val jsonString = Util.decodeHTML(matcher.group(1))
            val jsonArray = JSONArray(jsonString)
            for (i in 0 until jsonArray.length()) {
                val data = jsonArray.getJSONObject(i)
                formats.videoData.add(
                    VideoResource(
                        data.getString("src"),
                        data.getString("type"),
                        getResolutionFromVideoUrl(data.getString("src"))
                    )
                )
            }
            formats.title = getTitle(page)
            getThumbnailData(page)
            videoFormats.add(formats)
            finalize()
            return
        }
        brutForceWithQuotData(page)
    }

    private fun getTitle(page: String): String {
        fun findTitle(vararg regexes: Regex, defaultTitle: String): String {
            for (regex in regexes) {
                val m = Pattern.compile(regex.toString()).matcher(page)
                if (m.find()) {
                    return m.group(1) ?: defaultTitle
                }
            }
            return defaultTitle
        }
        return findTitle(
            Regex("\"twitter:title\"\\s*content\\s*=\\s*\"(.*?)\">"),
            Regex("\"og:title\"\\s*content\\s*=\\s*\"(.*?)\">"),
            Regex("\"twitter:description\"\\s*content\\s*=\\s*\"([\\w\\W]*?)\">"),
            Regex("\"og:description\"\\s*content\\s*=\\s*\"([\\w\\W]*?)\">"),
            defaultTitle = "LinkedIn_Video"
        )
    }

    private suspend fun brutForceWithQuotData(page: String) {
        val matcher =
            Pattern.compile("\\W*?(\\{(?:"|\")data.*\\})\\W*"),
            Regex("\"og:image\"\\s*content\\s*=\\s*\"(.*?)\">")
        )
        thumbnailUrl?.let {
            formats.imageData.add(ImageResource(it))
        }
    }

    private fun getResolutionFromVideoUrl(url: String): String {
        val matcher = Pattern.compile("mp4-(.*?)-").matcher(url)
        if (matcher.find()) {
            return matcher.group(1) ?: "--"
        }
        return "--"
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy