On dirait le même système que la demande de @Mosda95
ici :
Bonjour la communauté,
J’ai une problématique sur le scraping de ce site : https://global-industrie.com/fr/liste-des-exposants
J’ai tenté de scraper le site via Octoparse mais je bloque au niveau d’entrer à l’intérieur d’une page. Je pense avoir compris le problème. Le Xpath relatif semble être dynamique.
Quelqu’un a-t-il déjà eu cette problématique ou faut-il passer par du Python ?
Merci d’avance pour vos retours.
Boucler sur cette requête pour avoir la liste des exposants :
curl "https://api.swapcard.com/graphql" -X POST -H "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/109.0" -H "Accept: */*" -H "Accept-Language: fr,fr-FR;q=0.8,en-US;q=0.5,en;q=0.3" -H "Accept-Encoding: gzip, deflate, br" -H "Referer: https://online.sival-angers.com/" -H "content-type: application/json" -H "x-client-version: 2.241.4" -H "x-client-origin: online.sival-angers.com" -H "X-Content-Language: fr_FR" -H "Origin: https://online.sival-angers.com" -H "Connection: keep-alive" -H "Sec-Fetch-Dest: empty" -H "Sec-Fetch-Mode: cors" -H "Sec-Fetch-Site: cross-site" -H "Pragma: no-cache" -H "Cache-Control: no-cache" -H "TE: trailers" --data-raw "^[{""operationName"":""EventExhibitorListViewConnectionQuery"",""variables"":{""withEvent"":true,""viewId"":""RXZlbnRWaWV3XzM1OTI5Mw=="",""eventId"":""RXZlbnRfOTMwODA2"",""selectedFilters"":^[{""mustEventFiltersIn"":^[{""filterId"":""RmllbGREZWZpbml0aW9uXzIzMDQ3Ng=="",""values"":^[""RmllbGRWYWx1ZV8xNDg3NTU5MA==""^]}^]}^],""endCursor"":""WzAuMDA1MDY1OTE4LCJpcmlzb2xhcmlzIl0=""},""extensions"":{""persistedQuery"":{""version"":1,""sha256Hash"":""b7d34d371267414bb97bcb6561e50248d030523308a211765fe3f69f4b506569""}}}^]"
pour chacun, la requête pour récupérer les données :
curl "https://api.swapcard.com/graphql" -X POST -H "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/109.0" -H "Accept: */*" -H "Accept-Language: fr,fr-FR;q=0.8,en-US;q=0.5,en;q=0.3" -H "Accept-Encoding: gzip, deflate, br" -H "Referer: https://online.sival-angers.com/" -H "content-type: application/json" -H "x-client-version: 2.241.4" -H "x-client-origin: online.sival-angers.com" -H "X-Content-Language: fr_FR" -H "Origin: https://online.sival-angers.com" -H "Connection: keep-alive" -H "Sec-Fetch-Dest: empty" -H "Sec-Fetch-Mode: cors" -H "Sec-Fetch-Site: cross-site" -H "Pragma: no-cache" -H "Cache-Control: no-cache" -H "TE: trailers" --data-raw "^[{""operationName"":""ExhibitorDetailsBackgroundQuery"",""variables"":{""exhibitorId"":""RXhoaWJpdG9yXzU4OTMyOQ=="",""eventId"":""RXZlbnRfOTMwODA2""},""extensions"":{""persistedQuery"":{""version"":1,""sha256Hash"":""0ed9ac36d84a83480f9470e771b814d6b8d29d2183d08a6fc6a0dc834febb559""}}},{""operationName"":""EventExhibitorDetailsViewQuery"",""variables"":{""withEvent"":true,""skipMeetings"":true,""exhibitorId"":""RXhoaWJpdG9yXzU4OTMyOQ=="",""eventId"":""RXZlbnRfOTMwODA2""},""extensions"":{""persistedQuery"":{""version"":1,""sha256Hash"":""89f227d490052f65e58416ddfe79c84856de601e2543f6655c50412bb2ac8a5d""}}},{""operationName"":""SingleCommunityQuery"",""variables"":{},""extensions"":{""persistedQuery"":{""version"":1,""sha256Hash"":""0fbbcdbf8bde4a9b8986bb9982f3d875d0ffb56f8e742c28ec9e958cc2729f8c""}}},{""operationName"":""ApplicationProvider_CurrentCommunity"",""variables"":{""communitySlug"":""sival""},""extensions"":{""persistedQuery"":{""version"":1,""sha256Hash"":""fffeb9264349b09ff4570fb4b292157e7be02dd9e45463ff440c397915edeb29""}}}^]"
On obtient ce genre de json avec toutes les données :
[
{
"data": {
"exhibitor": {
"id": "RXhoaWJpdG9yXzU4OTMyOQ==",
"backgroundImageUrl": null,
"__typename": "Core_Exhibitor"
}
}
},
{
"data": {
"exhibitor": {
"id": "RXhoaWJpdG9yXzU4OTMyOQ==",
"name": "JOLLY",
"type": "Fournisseurs de Matériels, Produits et Services",
"logoUrl": "https://cdn-api.swapcard.com/public/images/f71b756b2ada47d1809c3b373c3f8bdd.png",
"websiteUrl": "https://www.etsjolly.com",
"email": "[email protected] ",
"exhibitorMemberRole": "ANONYMOUS",
"description": null,
"htmlDescription": null,
"backgroundImageUrl": null,
"withEvent": {
"canTalkTo": false,
"fields": [
{
"__typename": "Core_LongTextField",
"id": "RmllbGREZWZpbml0aW9uXzI2MjMzNw==",
"name": "Activité",
"placeholder": null,
"isEditable": true,
"value": {
"id": "RmllbGRWYWx1ZV8xNDk4MTMyNQ==",
"longText": "Vente et réparation de matériels horticoles et pépinières, neuf et d'occasion.",
"__typename": "Core_LongTextFieldValue"
},
"section": null
},
{
"__typename": "Core_SelectField",
"id": "RmllbGREZWZpbml0aW9uXzQ2MDkyOQ==",
"name": "Prise de rdv en ligne",
"placeholder": null,
"isEditable": false,
"value": null,
"section": null
},
{
"__typename": "Core_LongTextField",
"id": "RmllbGREZWZpbml0aW9uXzIzMDQ4MA==",
"name": "Marques",
"placeholder": null,
"isEditable": false,
"value": {
"id": "RmllbGRWYWx1ZV8xNDk2Mzc5Mg==",
"longText": "WaterWick-Prins-Empas-Lommers-Willburg-Viscon Aimfresh-EazyCut-Mosa-Visser-Mayer",
"__typename": "Core_LongTextFieldValue"
},
"section": null
},
{
"__typename": "Core_SelectField",
"id": "RmllbGREZWZpbml0aW9uXzM5NTg4OA==",
"name": "Hall",
"placeholder": null,
"isEditable": false,
"value": {
"id": "RmllbGRWYWx1ZV8xNzIxOTE3Ng==",
"text": "ARDESIA",
"__typename": "Core_SelectFieldValue"
},
"section": null
},
{
"__typename": "Core_MultipleSelectField",
"id": "RmllbGREZWZpbml0aW9uXzIzMDQ3Mg==",
"name": "Filières concernées",
"placeholder": null,
"isEditable": true,
"values": [
{
"text": "Arboriculture",
"id": "RmllbGRWYWx1ZV8xNTAwMzYxNA==",
"__typename": "Core_SelectFieldValue"
},
{
"text": "Culture du Champignon",
"id": "RmllbGRWYWx1ZV8xNTAwMzYxNg==",
"__typename": "Core_SelectFieldValue"
},
{
"text": "Cultures Légumières",
"id": "RmllbGRWYWx1ZV8xNTAwMzYxMg==",
"__typename": "Core_SelectFieldValue"
},
{
"text": "Horticulture et Pépinières",
"id": "RmllbGRWYWx1ZV8xNzIyMzE3Mw==",
"__typename": "Core_SelectFieldValue"
}
],
"section": {
"id": "RmllbGREZWZpbml0aW9uU2VjdGlvbl8yNjkyMw==",
"name": "Champs d'activité",
"__typename": "Core_FieldDefinitionSection"
}
},
{
"__typename": "Core_TreeField",
"id": "RmllbGREZWZpbml0aW9uXzI5MDczNA==",
"name": "Champs d'activité",
"placeholder": null,
"isEditable": true,
"values": [
{
"id": "RmllbGRWYWx1ZV8xNDkzODMzMw==",
"path": [
{
"text": "Plantation",
"__typename": "Core_TreePathTranslationValue"
},
{
"text": "Matériels pour pépinières et plantations",
"__typename": "Core_TreePathTranslationValue"
}
],
"__typename": "Core_TreeFieldValue"
},
{
"id": "RmllbGRWYWx1ZV8xNDkzODMzNA==",
"path": [
{
"text": "Protection",
"__typename": "Core_TreePathTranslationValue"
},
{
"text": "Phytosanitaires, désinfection",
"__typename": "Core_TreePathTranslationValue"
}
],
"__typename": "Core_TreeFieldValue"
},
{
"id": "RmllbGRWYWx1ZV8xNDkzODMzOA==",
"path": [
{
"text": "Transport / Logistique",
"__typename": "Core_TreePathTranslationValue"
},
{
"text": "Transport, manutention",
"__typename": "Core_TreePathTranslationValue"
}
],
"__typename": "Core_TreeFieldValue"
},
{
"id": "RmllbGRWYWx1ZV8xNDk1ODk0Mw==",
"path": [
{
"text": "Conditionnement / Emballage",
"__typename": "Core_TreePathTranslationValue"
},
{
"text": "Calibrage",
"__typename": "Core_TreePathTranslationValue"
}
],
"__typename": "Core_TreeFieldValue"
},
{
"id": "RmllbGRWYWx1ZV8xNTAxMTM0NQ==",
"path": [
{
"text": "Matériels, produits et services multisectoriels",
"__typename": "Core_TreePathTranslationValue"
},
{
"text": "AgTech, nouvelles technologies",
"__typename": "Core_TreePathTranslationValue"
}
],
"__typename": "Core_TreeFieldValue"
},
{
"id": "RmllbGRWYWx1ZV8xNTAxMTM2Ng==",
"path": [
{
"text": "Conditionnement / Emballage",
"__typename": "Core_TreePathTranslationValue"
},
{
"text": "Emballage, contenant",
"__typename": "Core_TreePathTranslationValue"
}
],
"__typename": "Core_TreeFieldValue"
},
{
"id": "RmllbGRWYWx1ZV8xNTAxMTM3Ng==",
"path": [
{
"text": "Plantation",
"__typename": "Core_TreePathTranslationValue"
},
{
"text": "Matériels de mise en culture",
"__typename": "Core_TreePathTranslationValue"
}
],
"__typename": "Core_TreeFieldValue"
},
{
"id": "RmllbGRWYWx1ZV8xNTAxMTM3Nw==",
"path": [
{
"text": "Conditionnement / Emballage",
"__typename": "Core_TreePathTranslationValue"
},
{
"text": "Lavage, liage, pesage",
"__typename": "Core_TreePathTranslationValue"
}
],
"__typename": "Core_TreeFieldValue"
},
{
"id": "RmllbGRWYWx1ZV8xNTAxMTQ1Mw==",
"path": [
{
"text": "Protection",
"__typename": "Core_TreePathTranslationValue"
},
{
"text": "Matériels de protection des cultures",
"__typename": "Core_TreePathTranslationValue"
}
],
"__typename": "Core_TreeFieldValue"
},
{
"id": "RmllbGRWYWx1ZV8xNTAxMTUzNA==",
"path": [
{
"text": "Culture / Entretien",
"__typename": "Core_TreePathTranslationValue"
},
{
"text": "Engrais et amendements",
"__typename": "Core_TreePathTranslationValue"
}
],
"__typename": "Core_TreeFieldValue"
},
{
"id": "RmllbGRWYWx1ZV8xNTAxMTUzNQ==",
"path": [
{
"text": "Plantation",
"__typename": "Core_TreePathTranslationValue"
},
{
"text": "Supports de culture",
"__typename": "Core_TreePathTranslationValue"
}
],
"__typename": "Core_TreeFieldValue"
},
{
"id": "RmllbGRWYWx1ZV8xNTAxMTc0Mw==",
"path": [
{
"text": "Culture / Entretien",
"__typename": "Core_TreePathTranslationValue"
},
{
"text": "Matériels pour la taille et l'entretien des cultures",
"__typename": "Core_TreePathTranslationValue"
}
],
"__typename": "Core_TreeFieldValue"
},
{
"id": "RmllbGRWYWx1ZV8xNTAxMTk5OQ==",
"path": [
{
"text": "Conditionnement / Emballage",
"__typename": "Core_TreePathTranslationValue"
},
{
"text": "Machine de conditionnement",
"__typename": "Core_TreePathTranslationValue"
}
],
"__typename": "Core_TreeFieldValue"
}
],
"section": {
"id": "RmllbGREZWZpbml0aW9uU2VjdGlvbl8yNjkyMw==",
"name": "Champs d'activité",
"__typename": "Core_FieldDefinitionSection"
}
},
{
"__typename": "Core_MultipleSelectField",
"id": "RmllbGREZWZpbml0aW9uXzIzMDQ3Ng==",
"name": "Zone de chalandise (Pays et / ou continents)",
"placeholder": null,
"isEditable": true,
"values": [
{
"text": "France",
"id": "RmllbGRWYWx1ZV8xNDg3NTU5MA==",
"__typename": "Core_SelectFieldValue"
}
],
"section": {
"id": "RmllbGREZWZpbml0aW9uU2VjdGlvbl8yNTIzNg==",
"name": "Marchés",
"__typename": "Core_FieldDefinitionSection"
}
},
{
"__typename": "Core_SelectField",
"id": "RmllbGREZWZpbml0aW9uXzIzMDQ3OQ==",
"name": "Export",
"placeholder": null,
"isEditable": true,
"value": {
"id": "RmllbGRWYWx1ZV8xNDMwODc3Ng==",
"text": "Présent uniquement sur le marché français",
"__typename": "Core_SelectFieldValue"
},
"section": {
"id": "RmllbGREZWZpbml0aW9uU2VjdGlvbl8yNTIzNg==",
"name": "Marchés",
"__typename": "Core_FieldDefinitionSection"
}
},
{
"__typename": "Core_LongTextField",
"id": "RmllbGREZWZpbml0aW9uXzIzMDQ2OQ==",
"name": "Certifications et démarches environnementales",
"placeholder": "Décrivez vos certifications et / ou démarches environnementales en cours ou mises en place dans votre entreprise",
"isEditable": true,
"value": null,
"section": {
"id": "RmllbGREZWZpbml0aW9uU2VjdGlvbl8yNTIzNw==",
"name": "Innovation, environnement, société",
"__typename": "Core_FieldDefinitionSection"
}
},
{
"__typename": "Core_MultipleSelectField",
"id": "RmllbGREZWZpbml0aW9uXzI1NzIxMQ==",
"name": "Domaines d'innovation",
"placeholder": null,
"isEditable": true,
"values": [
{
"text": "Robotique, électronique, machinisme et automatisme",
"id": "RmllbGRWYWx1ZV8xNTMxODQzNQ==",
"__typename": "Core_SelectFieldValue"
},
{
"text": "Solutions pour la production",
"id": "RmllbGRWYWx1ZV8xNTMxODQzMA==",
"__typename": "Core_SelectFieldValue"
}
],
"section": {
"id": "RmllbGREZWZpbml0aW9uU2VjdGlvbl8yNTIzNw==",
"name": "Innovation, environnement, société",
"__typename": "Core_FieldDefinitionSection"
}
}
],
"editableFields": {
"name": false,
"logoUrl": true,
"description": false,
"address": true,
"websiteUrl": true,
"type": false,
"socialNetworks": true,
"phoneNumbers": true,
"booth": true,
"email": true,
"bannerImage": true,
"bannerVideo": false,
"advertisements": true,
"backgroundImage": true,
"__typename": "Core_EditableExhibitorFields"
},
"booths": [
{
"id": "TWVldGluZ1BsYWNlXzQ5MjQ2OA==",
"category": "ARDESIA - Allée",
"name": "D-162",
"__typename": "Core_Location"
}
],
"__typename": "Core_ExhibitorWithEvent",
"isBookmarked": null,
"advertisements": [],
"products": {
"subcategories": {
"categories": [],
"__typename": "Core_EventProductCategoriesResult"
},
"__typename": "Core_EventProducts"
},
"similarExhibitors": []
},
"address": {
"street": "Route de Beaufort",
"zipCode": "49800",
"place": null,
"country": "FR",
"city": "BRAIN SUR L'AUTHION",
"state": null,
"__typename": "Core_Address"
},
"socialNetworks": [
{
"profile": "ets.jolly",
"type": "FACEBOOK",
"__typename": "Core_SocialNetwork"
},
{
"profile": "ETS_JOLLY",
"type": "TWITTER",
"__typename": "Core_SocialNetwork"
},
{
"profile": "ets-jolly",
"type": "LINKEDIN",
"__typename": "Core_SocialNetwork"
},
{
"profile": "ets.jolly",
"type": "INSTAGRAM",
"__typename": "Core_SocialNetwork"
}
],
"phoneNumbers": [
{
"type": "LANDLINE",
"formattedNumber": "0241804034",
"__typename": "Core_PhoneNumber"
},
{
"type": "MOBILE",
"formattedNumber": null,
"__typename": "Core_PhoneNumber"
}
],
"banner": {
"imageUrl": "https://cdn-api.swapcard.com/public/images/6841ca030d1a409b88c70e9ea84ffe6e.png",
"embeddedVideo": null,
"__typename": "Core_Banner"
},
"__typename": "Core_Exhibitor",
"documents": [
{
"id": "RG9jdW1lbnRfNDc0MDM4",
"name": "https://www.etsjolly.com/",
"url": "https://www.etsjolly.com/",
"description": "https://www.etsjolly.com/",
"type": "LINK",
"__typename": "Core_Document"
}
],
"isBookmarked": null
},
"linkedExhibitors": {
"pageInfo": {
"hasNextPage": false,
"endCursor": null,
"__typename": "Core_PageInfo"
},
"totalCount": 0,
"nodes": [],
"__typename": "Core_ExhibitorsConnection"
},
"members": {
"totalCount": 0,
"nodes": [],
"pageInfo": {
"hasNextPage": false,
"endCursor": null,
"__typename": "Core_PageInfo"
},
"__typename": "Core_EventPeopleConnection"
},
"membersAsAdmin": {
"totalCount": 0,
"nodes": [],
"pageInfo": {
"hasNextPage": false,
"endCursor": null,
"__typename": "Core_PageInfo"
},
"__typename": "Core_ExhibitorMembersConnection"
},
"plannings": {
"nodes": [],
"pageInfo": {
"totalEdges": 0,
"hasNextPage": false,
"nextCursor": null,
"__typename": "Core_PageInfoType"
},
"__typename": "Core_EdgeListPlanning"
},
"onDemandPlannings": {
"nodes": [],
"pageInfo": {
"totalEdges": 0,
"hasNextPage": false,
"nextCursor": null,
"__typename": "Core_PageInfoType"
},
"__typename": "Core_EdgeListPlanning"
},
"productCategories": [
{
"totalLimit": 3,
"productCount": 0,
"category": {
"id": "UHJvZHVjdENhdGVnb3J5XzUwNTkx",
"name": "Matériels, produits et services",
"__typename": "Core_ProductCategory"
},
"__typename": "Core_ExhibitorRootProductCategory"
},
{
"totalLimit": 3,
"productCount": 0,
"category": {
"id": "UHJvZHVjdENhdGVnb3J5XzU1MTY3",
"name": "Images",
"__typename": "Core_ProductCategory"
},
"__typename": "Core_ExhibitorRootProductCategory"
},
{
"totalLimit": 0,
"productCount": 0,
"category": {
"id": "UHJvZHVjdENhdGVnb3J5XzY2NDYx",
"name": "Répertoire BIO",
"__typename": "Core_ProductCategory"
},
"__typename": "Core_ExhibitorRootProductCategory"
},
{
"totalLimit": 0,
"productCount": 0,
"category": {
"id": "UHJvZHVjdENhdGVnb3J5XzY2NDYy",
"name": "Offres d'emploi",
"__typename": "Core_ProductCategory"
},
"__typename": "Core_ExhibitorRootProductCategory"
},
{
"totalLimit": 0,
"productCount": 0,
"category": {
"id": "UHJvZHVjdENhdGVnb3J5XzY2ODk3",
"name": "Offres Destination Angers",
"__typename": "Core_ProductCategory"
},
"__typename": "Core_ExhibitorRootProductCategory"
},
{
"totalLimit": null,
"productCount": 0,
"category": {
"id": "UHJvZHVjdENhdGVnb3J5XzExNDg2OA==",
"name": "Nos actions RSE",
"__typename": "Core_ProductCategory"
},
"__typename": "Core_ExhibitorRootProductCategory"
}
]
}
},
{
"data": {
"communities": {
"totalCount": 1,
"nodes": [
{
"id": "Q29tbXVuaXR5XzIxNzQw",
"slug": "sival",
"events": {
"totalCount": 0,
"nodes": [],
"__typename": "Core_EventsConnection"
},
"suggestedEvents": {
"totalCount": 1,
"nodes": [
{
"id": "RXZlbnRfOTMwODA2",
"slug": "sival-2023",
"__typename": "Core_Event"
}
],
"__typename": "Core_EventsConnection"
},
"__typename": "Core_Community"
}
],
"__typename": "Core_CommunitiesConnection"
}
}
},
{
"data": {
"communities": {
"nodes": [
{
"id": "Q29tbXVuaXR5XzIxNzQw",
"slug": "sival",
"name": "SIVAL",
"title": "SIVAL Online - L'app Web et Mobile du SIVAL",
"logoUrl": "https://cdn-api.swapcard.com/public/images/b3436ebc1adf4c9096b5aab380b2dcf2.png",
"bannerImageUrl": "https://cdn-api.swapcard.com/public/images/1ba5c7392fb14dccbf6d5017dec6e685.png",
"themes": [
{
"theme": {
"id": "VGhlbWVfMTY1Mzc=",
"mode": null,
"textColor": null,
"primaryColor": null,
"buttonsColor": "#2c3786",
"backgroundColor": null,
"containerColor": null,
"backgroundImageUrl": "https://cdn-api.swapcard.com/public/images/56f9b1044e804aca91aeca6efb909544.png",
"__typename": "Core_Theme"
},
"__typename": "Core_ThemeLink"
}
],
"__typename": "Core_Community",
"contents": {
"views": [],
"__typename": "Core_CommunityContents"
}
}
],
"__typename": "Core_CommunitiesConnection"
}
}
}
]