From 3940486c2fd0cc860dc63d8b76af4d945b50e69f Mon Sep 17 00:00:00 2001 From: Yassir LAIRGI Date: Tue, 17 Sep 2024 09:21:42 +0200 Subject: [PATCH] feat : example of how to run itext2kg with different llms and how to expand the graph --- examples/different_llm_models.ipynb | 572 ++++++++++++++++++++++++++++ 1 file changed, 572 insertions(+) create mode 100644 examples/different_llm_models.ipynb diff --git a/examples/different_llm_models.ipynb b/examples/different_llm_models.ipynb new file mode 100644 index 0000000..918ef30 --- /dev/null +++ b/examples/different_llm_models.ipynb @@ -0,0 +1,572 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2\n", + "\n", + "\n", + "import sys\n", + "sys.path.append(\"..\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now, iText2KG is compatible with all language models supported by LangChain. \n", + "\n", + "To use iText2KG, you will need both a chat model and an embeddings model. \n", + "\n", + "For available chat models, refer to the options listed at: https://python.langchain.com/v0.2/docs/integrations/chat/. \n", + "For embedding models, explore the choices at: https://python.langchain.com/v0.2/docs/integrations/text_embedding/. \n", + "\n", + "This notebook will show you how to run iText2KG using Mistral, Ollama, and OpenAI models. \n", + "\n", + "**Please ensure that you install the necessary package for each chat model before use.**" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Mistral" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For Mistral, please set up your model using the tutorial here: https://python.langchain.com/v0.2/docs/integrations/chat/mistralai/. Similarly, for the embedding model, follow the setup guide here: https://python.langchain.com/v0.2/docs/integrations/text_embedding/mistralai/ ." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "c:\\Users\\yassir.lairgi\\Documents\\Projects\\iText2KG\\venv-test\\Lib\\site-packages\\langchain_mistralai\\embeddings.py:169: UserWarning: Could not download mistral tokenizer from Huggingface for calculating batch sizes. Set a Huggingface token via the HF_TOKEN environment variable to download the real tokenizer. Falling back to a dummy tokenizer that uses `len()`.\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "from langchain_mistralai import ChatMistralAI\n", + "from langchain_mistralai import MistralAIEmbeddings\n", + "\n", + "mistral_api_key = \"##\"\n", + "mistral_llm_model = ChatMistralAI(\n", + " api_key = mistral_api_key,\n", + " model=\"mistral-large-latest\",\n", + " temperature=0,\n", + " max_retries=2,\n", + ")\n", + "\n", + "\n", + "mistral_embeddings_model = MistralAIEmbeddings(\n", + " model=\"mistral-embed\",\n", + " api_key = mistral_api_key\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# OpenAI" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The same applies for OpenAI. \n", + "\n", + "please setup your model using the tutorial : https://python.langchain.com/v0.2/docs/integrations/chat/openai/\n", + "The same for embedding model : https://python.langchain.com/v0.2/docs/integrations/text_embedding/openai/" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_openai import ChatOpenAI, OpenAIEmbeddings\n", + "\n", + "openai_api_key = \"##\"\n", + "\n", + "openai_llm_model = llm = ChatOpenAI(\n", + " api_key = openai_api_key,\n", + " model=\"gpt-4o\",\n", + " temperature=0,\n", + " max_tokens=None,\n", + " timeout=None,\n", + " max_retries=2,\n", + ")\n", + "\n", + "openai_embeddings_model = OpenAIEmbeddings(\n", + " api_key = openai_api_key ,\n", + " model=\"text-embedding-3-large\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Ollama" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The same applies for Ollama. \n", + "\n", + "please setup your model using the tutorial : https://python.langchain.com/v0.2/docs/integrations/chat/ollama/\n", + "The same for embedding model : https://python.langchain.com/v0.2/docs/integrations/text_embedding/openai/" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_ollama import ChatOllama, OllamaEmbeddings\n", + "\n", + "llm = ChatOllama(\n", + " model=\"llama3\",\n", + " temperature=0,\n", + ")\n", + "\n", + "embeddings = OllamaEmbeddings(\n", + " model=\"llama3\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# iText2KG" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "* Use Case: We aim to connect an online job description with a generated CV using Knowledge Graphs. \n", + "\n", + "* The objective is to assess the candidate's suitability for the job offer. You can utilize different LLM or embedding models for each module of iText2KG. However, it is important to ensure that the dimensions of node and relation embeddings are consistent across models. If the embedding dimensions differ, cosine similarity may struggle to accurately measure vector distances for further matching." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Document Distiller" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### CV" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.document_loaders import PyPDFLoader\n", + "\n", + "loader = PyPDFLoader(f\"../datasets/cvs/CV_Emily_Davis.pdf\")\n", + "pages = loader.load_and_split()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "from itext2kg.documents_distiller import DocumentsDisiller, CV\n", + "\n", + "\n", + "document_distiller = DocumentsDisiller(llm_model = openai_llm_model)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "IE_query = '''\n", + "# DIRECTIVES : \n", + "- Act like an experienced information extractor. \n", + "- You have a chunk of a CV.\n", + "- If you do not find the right information, keep its place empty.\n", + "'''\n", + "# we have replaced the curly braces with square brackets to avoid the error in the query\n", + "distilled_cv = document_distiller.distill(documents=[page.page_content.replace(\"{\", '[').replace(\"}\", \"]\") for page in pages], IE_query=IE_query, output_data_structure=CV)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "semantic_blocks_cv = [f\"{key} - {value}\".replace(\"{\", \"[\").replace(\"}\", \"]\") for key, value in distilled_doc.items() if value !=[] and value != \"\" and value != None]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Job description" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "job_offer = \"\"\" \n", + "About the Job Offer\n", + "THE FICTITIOUS COMPANY\n", + "\n", + "FICTITIOUS COMPANY is a high-end French fashion brand known for its graphic and poetic style, driven by the values of authenticity and transparency upheld by its creator Simon Porte Jacquemus.\n", + "\n", + "Your Role\n", + "\n", + "Craft visual stories that captivate, inform, and inspire. Transform concepts and ideas into visual representations. As a member of the studio, in collaboration with the designers and under the direction of the Creative Designer, you should be able to take written or spoken ideas and convert them into designs that resonate. You need to have a deep understanding of the brand image and DNA, being able to find the style and layout suited to each project.\n", + "\n", + "Your Missions\n", + "\n", + "Translate creative direction into high-quality silhouettes using Photoshop\n", + "Work on a wide range of projects to visualize and develop graphic designs that meet each brief\n", + "Work independently as well as in collaboration with the studio team to meet deadlines, potentially handling five or more projects simultaneously\n", + "Develop color schemes and renderings in Photoshop, categorized by themes, subjects, etc.\n", + "Your Profile\n", + "\n", + "Bachelor’s degree (Bac+3/5) in Graphic Design or Art\n", + "3 years of experience in similar roles within a luxury brand's studio\n", + "Proficiency in Adobe Suite, including Illustrator, InDesign, Photoshop\n", + "Excellent communication and presentation skills\n", + "Strong organizational and time management skills to meet deadlines in a fast-paced environment\n", + "Good understanding of the design process\n", + "Freelance contract possibility\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [], + "source": [ + "job_offer_2 = \"\"\" \n", + "About the Job Offer\n", + "About The Role\n", + "\n", + "Innovate Design Co. is seeking a talented Visual Designer with expertise in web and interactive design to enhance our visual brand identity. This role is responsible for external digital storytelling and communications design.\n", + "\n", + "You'll be in charge of designing and developing engaging web pages, infographics, social media content, and global digital experiences that effectively convey our cutting-edge technology and products to diverse audiences.\n", + "\n", + "Responsibilities\n", + "\n", + "Translate our core values, product, marketing, and sales objectives into beautifully crafted deliverables\n", + "Design compelling, brand-aligned digital and print materials, including websites, social media content, ads, third-party marketplaces, presentations, animations, events, prints, etc.\n", + "Develop and maintain visual brand identity guidelines, ensuring brand consistency across all media and multichannel platforms\n", + "Communicate Innovate Design Co.'s narrative through conversion and data-driven design\n", + "Participate in brainstorming sessions and collaborate with stakeholders to articulate a creative vision that enhances our brand’s visual storytelling\n", + "Promote design comprehension and sensibility across the organization, refining work methodologies and design processes to enhance efficiency and effectiveness\n", + "Required Qualifications\n", + "\n", + "A Bachelor’s degree (or equivalent) in Graphic Design / Visual Arts - or a self-starter with a strong creative project track record\n", + "5-7 years of experience in Graphic Design, including brand design, 360° marketing and communications design, product brand design, 0-to-1 projects, front-end development, etc.\n", + "Work experience within well-structured design departments operating in the tech/software space (including leading creative agencies, scale-ups, and mature tech companies)\n", + "Proficiency in Figma, Adobe CC, print design best practices, and a solid understanding of web technologies (HTML, CSS, JS)\n", + "A robust portfolio demonstrating a variety of design projects, showcasing creativity, originality, consistency, and attention to detail\n", + "Perfectly fluent in English, both written and spoken\n", + "Results-oriented, resourceful, innovative, intellectually curious, no-ego, proactive\n", + "Highly collaborative and able to balance multiple projects and stakeholders\n", + "Professional behavior with personal accountability, drive, and work ethics\n", + "You may be a good fit if you\n", + "\n", + "Share our excitement for the AI/technology space\n", + "Drive projects independently, make judgments based on brand and business goals, seek feedback, collaborate cross-functionally, and leverage others' expertise\n", + "Maintain high design standards without perfectionism, understanding tradeoffs in a fast-paced environment\n", + "Understand audience feelings, cultures, and challenges through an inquisitive and learning-based approach\n", + "What We Offer\n", + "\n", + "The ability to shape the exciting journey of technology and be part of the very early days of one of Europe’s hottest startups\n", + "A fun, young, international, and multicultural team — based in Paris, London, and San Francisco\n", + "Beautiful office space in the heart of Paris (Canal St Martin)\n", + "Competitive salary and benefits package\n", + "Opportunities for professional growth and development\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "from pydantic import BaseModel, Field\n", + "from typing import List, Optional\n", + "\n", + "class JobResponsibility(BaseModel):\n", + " description: str = Field(..., description=\"A specific responsibility in the job role\")\n", + "\n", + "class JobQualification(BaseModel):\n", + " skill: str = Field(..., description=\"A required or preferred skill for the job\")\n", + "\n", + "class JobCertification(BaseModel):\n", + " certification: str = Field(..., description=\"Required or preferred certifications for the job\")\n", + "\n", + "class JobOffer(BaseModel):\n", + " job_offer_title: str = Field(..., description=\"The job title\")\n", + " company: str = Field(..., description=\"The name of the company offering the job\")\n", + " location: str = Field(..., description=\"The job location (can specify if remote/hybrid)\")\n", + " job_type: str = Field(..., description=\"Type of job (e.g., full-time, part-time, contract)\")\n", + " responsibilities: List[JobResponsibility] = Field(..., description=\"List of key responsibilities\")\n", + " qualifications: List[JobQualification] = Field(..., description=\"List of required or preferred qualifications\")\n", + " certifications: Optional[List[JobCertification]] = Field(None, description=\"Required or preferred certifications\")\n", + " benefits: Optional[List[str]] = Field(None, description=\"List of job benefits\")\n", + " experience_required: str = Field(..., description=\"Required years of experience\")\n", + " salary_range: Optional[str] = Field(None, description=\"Salary range for the position\")\n", + " apply_url: Optional[str] = Field(None, description=\"URL to apply for the job\")" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "IE_query = '''\n", + "# DIRECTIVES : \n", + "- Act like an experienced information extractor. \n", + "- You have a chunk of a job offer description.\n", + "- If you do not find the right information, keep its place empty.\n", + "'''\n", + "# we have replaced the curly braces with square brackets to avoid the error in the query\n", + "distilled_Job_Offer = document_distiller.distill(documents=[job_offer], IE_query=IE_query, output_data_structure=JobOffer)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "semantic_blocks_job_offer = [f\"{key} - {value}\".replace(\"{\", \"[\").replace(\"}\", \"]\") for key, value in distilled_Job_Offer.items() if value !=[] and value != \"\" and value != None]" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [], + "source": [ + "distilled_Job_Offer_2 = document_distiller.distill(documents=[job_offer_2], IE_query=IE_query, output_data_structure=JobOffer)\n", + "semantic_blocks_job_offer_2 = [f\"{key} - {value}\".replace(\"{\", \"[\").replace(\"}\", \"]\") for key, value in distilled_Job_Offer_2.items() if value !=[] and value != \"\" and value != None]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## iText2KG for graph construction" + ] + }, + { + "cell_type": "code", + "execution_count": 84, + "metadata": {}, + "outputs": [], + "source": [ + "from itext2kg import iText2KG\n", + "\n", + "\n", + "itext2kg = iText2KG(llm_model = openai_llm_model, embeddings_model = openai_embeddings_model)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We constructed the first graph. As you can see, we have passed all the semantic blocks of the CV to the LLM as one block. You can also pass the job offer as another semantic block, or you can separate the semantic blocks while constructing the graph.\n", + "\n", + "You just need to pay attention to how much information is contained within each block. For example, the block \"name - Emily Davis\" does not contain much information, which will result in some isolated nodes (even if we reprompt it, it won't converge). It is better to concatenate it with other blocks or pass all the semantic blocks at once as we did here :) .\n", + "\n", + "You should also ensure that you pass a substantial amount of information within each block. If there is a lot of information, you will have a well-formed graph. However, the nodes may contain merged concepts (phrases from paragraphs).\n", + "\n", + "You need to find the optimal amount of information to include (you can experiment by iterating)." + ] + }, + { + "cell_type": "code", + "execution_count": 85, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[INFO] Extracting Entities from the Document 1\n", + "{'entities': [{'label': 'Person', 'name': 'Emily Davis'}, {'label': 'Contact Information', 'name': '+1 567 890 1234'}, {'label': 'Contact Information', 'name': 'emilydavis@example.com'}, {'label': 'Social Media', 'name': 'linkedin.com/in/emilydavis'}, {'label': 'Summary', 'name': 'Creative and passionate graphic designer with a keen eye for aesthetics and visual storytelling. Experienced in creating compelling designs for various media, including print and digital platforms.'}, {'label': 'Job Title', 'name': 'Senior Graphic Designer'}, {'label': 'Company', 'name': 'MNO Creative'}, {'label': 'Location', 'name': 'Boston, MA'}, {'label': 'Start Date', 'name': 'June 2017'}, {'label': 'End Date', 'name': 'Present'}, {'label': 'Responsibility', 'name': 'Designed logos, brochures, and social media graphics.'}, {'label': 'Responsibility', 'name': 'Collaborated with clients to understand their vision and deliver high-quality designs.'}, {'label': 'Responsibility', 'name': 'Managed multiple projects simultaneously.'}, {'label': 'Degree', 'name': 'Bachelor of Fine Arts in Graphic Design'}, {'label': 'Institution', 'name': 'Rhode Island School of Design'}, {'label': 'End Date', 'name': 'May 2017'}, {'label': 'Skill', 'name': 'Adobe Creative Suite (Photoshop, Illustrator, InDesign)'}, {'label': 'Skill', 'name': 'Sketch'}, {'label': 'Skill', 'name': 'Figma'}, {'label': 'Skill', 'name': 'Creativity'}, {'label': 'Skill', 'name': 'communication'}, {'label': 'Skill', 'name': 'time management'}, {'label': 'Certification', 'name': 'Adobe Certified Expert'}, {'label': 'Certification', 'name': 'UX Design Certification by Nielsen Norman Group'}]}\n", + "[INFO] Extracting Relations from the Document 1\n", + "{'relationships': [{'startNode': 'emily davis', 'endNode': '+1 567 890 1234', 'name': 'has phone number'}, {'startNode': 'emily davis', 'endNode': 'emilydavis@example.com', 'name': 'has email'}, {'startNode': 'emily davis', 'endNode': 'linkedin.com/in/emilydavis', 'name': 'has linkedin'}, {'startNode': 'emily davis', 'endNode': 'creative and passionate graphic designer with a keen eye for aesthetics and visual storytelling. experienced in creating compelling designs for various media, including print and digital platforms.', 'name': 'has summary'}, {'startNode': 'emily davis', 'endNode': 'senior graphic designer', 'name': 'has job title'}, {'startNode': 'senior graphic designer', 'endNode': 'mno creative', 'name': 'at company'}, {'startNode': 'mno creative', 'endNode': 'boston, ma', 'name': 'located in'}, {'startNode': 'senior graphic designer', 'endNode': 'june 2017', 'name': 'start date'}, {'startNode': 'senior graphic designer', 'endNode': 'present', 'name': 'end date'}, {'startNode': 'senior graphic designer', 'endNode': 'designed logos, brochures, and social media graphics.', 'name': 'responsibility'}, {'startNode': 'senior graphic designer', 'endNode': 'collaborated with clients to understand their vision and deliver high quality designs.', 'name': 'responsibility'}, {'startNode': 'senior graphic designer', 'endNode': 'managed multiple projects simultaneously.', 'name': 'responsibility'}, {'startNode': 'emily davis', 'endNode': 'bachelor of fine arts in graphic design', 'name': 'has degree'}, {'startNode': 'bachelor of fine arts in graphic design', 'endNode': 'rhode island school of design', 'name': 'from institution'}, {'startNode': 'bachelor of fine arts in graphic design', 'endNode': 'may 2017', 'name': 'end date'}, {'startNode': 'emily davis', 'endNode': 'adobe creative suite (photoshop, illustrator, indesign)', 'name': 'has skill'}, {'startNode': 'emily davis', 'endNode': 'sketch', 'name': 'has skill'}, {'startNode': 'emily davis', 'endNode': 'figma', 'name': 'has skill'}, {'startNode': 'emily davis', 'endNode': 'creativity', 'name': 'has skill'}, {'startNode': 'emily davis', 'endNode': 'communication', 'name': 'has skill'}, {'startNode': 'emily davis', 'endNode': 'time management', 'name': 'has skill'}, {'startNode': 'emily davis', 'endNode': 'adobe certified expert', 'name': 'has certification'}, {'startNode': 'emily davis', 'endNode': 'ux design certification by nielsen norman group', 'name': 'has certification'}]}\n" + ] + } + ], + "source": [ + "global_ent, global_rel = itext2kg.build_graph(sections=[semantic_blocks_cv], ent_threshold=0.6, rel_threshold=0.6)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We construct the second graph, noting that we already have existing entities and relationships." + ] + }, + { + "cell_type": "code", + "execution_count": 86, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[INFO] Extracting Entities from the Document 1\n", + "{'entities': [{'label': 'Job Offer Title', 'name': 'Graphic Designer'}, {'label': 'Company', 'name': 'FICTITIOUS COMPANY'}, {'label': 'Job Type', 'name': 'Freelance'}, {'label': 'Responsibility', 'name': 'Translate creative direction into high-quality silhouettes using Photoshop'}, {'label': 'Responsibility', 'name': 'Work on a wide range of projects to visualize and develop graphic designs that meet each brief'}, {'label': 'Responsibility', 'name': 'Work independently as well as in collaboration with the studio team to meet deadlines, potentially handling five or more projects simultaneously'}, {'label': 'Responsibility', 'name': 'Develop color schemes and renderings in Photoshop, categorized by themes, subjects, etc.'}, {'label': 'Qualification', 'name': 'Bachelor’s degree (Bac+3/5) in Graphic Design or Art'}, {'label': 'Qualification', 'name': \"3 years of experience in similar roles within a luxury brand's studio\"}, {'label': 'Qualification', 'name': 'Proficiency in Adobe Suite, including Illustrator, InDesign, Photoshop'}, {'label': 'Qualification', 'name': 'Excellent communication and presentation skills'}, {'label': 'Qualification', 'name': 'Strong organizational and time management skills to meet deadlines in a fast-paced environment'}, {'label': 'Qualification', 'name': 'Good understanding of the design process'}, {'label': 'Experience Required', 'name': '3 years'}]}\n", + "[INFO] Extracting Relations from the Document 1\n", + "{'relationships': [{'startNode': 'graphic designer', 'endNode': 'fictitious company', 'name': 'job_offer_title'}, {'startNode': 'graphic designer', 'endNode': 'freelance', 'name': 'job_type'}, {'startNode': 'graphic designer', 'endNode': 'translate creative direction into high quality silhouettes using photoshop', 'name': 'responsibility'}, {'startNode': 'graphic designer', 'endNode': 'work on a wide range of projects to visualize and develop graphic designs that meet each brief', 'name': 'responsibility'}, {'startNode': 'graphic designer', 'endNode': 'work independently as well as in collaboration with the studio team to meet deadlines, potentially handling five or more projects simultaneously', 'name': 'responsibility'}, {'startNode': 'graphic designer', 'endNode': 'develop color schemes and renderings in photoshop, categorized by themes, subjects, etc.', 'name': 'responsibility'}, {'startNode': 'graphic designer', 'endNode': 'bachelor’s degree (bac+3/5) in graphic design or art', 'name': 'qualification'}, {'startNode': 'graphic designer', 'endNode': \"3 years of experience in similar roles within a luxury brand's studio\", 'name': 'qualification'}, {'startNode': 'graphic designer', 'endNode': 'proficiency in adobe suite, including illustrator, indesign, photoshop', 'name': 'qualification'}, {'startNode': 'graphic designer', 'endNode': 'excellent communication and presentation skills', 'name': 'qualification'}, {'startNode': 'graphic designer', 'endNode': 'strong organizational and time management skills to meet deadlines in a fast paced environment', 'name': 'qualification'}, {'startNode': 'graphic designer', 'endNode': 'good understanding of the design process', 'name': 'qualification'}, {'startNode': 'graphic designer', 'endNode': '3 years', 'name': 'experience_required'}]}\n", + "[INFO] Matching the Document 1 Entities and Relationships with the Existing Global Entities/Relations\n", + "[INFO] Wohoo ! Entity using embeddings is matched --- graphic designer -merged--> senior graphic designer \n", + "[INFO] Wohoo ! Entity using embeddings is matched --- bachelor’s degree (bac+3/5) in graphic design or art -merged--> bachelor of fine arts in graphic design \n", + "[INFO] Wohoo ! Entity using embeddings is matched --- proficiency in adobe suite, including illustrator, indesign, photoshop -merged--> adobe creative suite (photoshop, illustrator, indesign) \n", + "[INFO] Wohoo ! Relation using embeddings is matched --- job offer title -merged--> has_job_title \n" + ] + } + ], + "source": [ + "global_ent_, global_rel_ = itext2kg.build_graph(sections=[semantic_blocks_job_offer], existing_global_entities = global_ent, existing_global_relationships = global_rel, ent_threshold=0.6, rel_threshold=0.6)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Same :)" + ] + }, + { + "cell_type": "code", + "execution_count": 88, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[INFO] Extracting Entities from the Document 1\n", + "{'entities': [{'label': 'Job Offer Title', 'name': 'Visual Designer'}, {'label': 'Company', 'name': 'Innovate Design Co.'}, {'label': 'Location', 'name': 'Paris'}, {'label': 'Location', 'name': 'London'}, {'label': 'Location', 'name': 'San Francisco'}, {'label': 'Responsibility', 'name': 'Translate our core values, product, marketing, and sales objectives into beautifully crafted deliverables'}, {'label': 'Responsibility', 'name': 'Design compelling, brand-aligned digital and print materials, including websites, social media content, ads, third-party marketplaces, presentations, animations, events, prints, etc.'}, {'label': 'Responsibility', 'name': 'Develop and maintain visual brand identity guidelines, ensuring brand consistency across all media and multichannel platforms'}, {'label': 'Responsibility', 'name': \"Communicate Innovate Design Co.'s narrative through conversion and data-driven design\"}, {'label': 'Responsibility', 'name': 'Participate in brainstorming sessions and collaborate with stakeholders to articulate a creative vision that enhances our brand’s visual storytelling'}, {'label': 'Responsibility', 'name': 'Promote design comprehension and sensibility across the organization, refining work methodologies and design processes to enhance efficiency and effectiveness'}, {'label': 'Qualification', 'name': 'A Bachelor’s degree (or equivalent) in Graphic Design / Visual Arts - or a self-starter with a strong creative project track record'}, {'label': 'Qualification', 'name': '5-7 years of experience in Graphic Design, including brand design, 360° marketing and communications design, product brand design, 0-to-1 projects, front-end development, etc.'}, {'label': 'Qualification', 'name': 'Work experience within well-structured design departments operating in the tech/software space (including leading creative agencies, scale-ups, and mature tech companies)'}, {'label': 'Qualification', 'name': 'Proficiency in Figma, Adobe CC, print design best practices, and a solid understanding of web technologies (HTML, CSS, JS)'}, {'label': 'Qualification', 'name': 'A robust portfolio demonstrating a variety of design projects, showcasing creativity, originality, consistency, and attention to detail'}, {'label': 'Qualification', 'name': 'Perfectly fluent in English, both written and spoken'}, {'label': 'Qualification', 'name': 'Results-oriented, resourceful, innovative, intellectually curious, no-ego, proactive'}, {'label': 'Qualification', 'name': 'Highly collaborative and able to balance multiple projects and stakeholders'}, {'label': 'Qualification', 'name': 'Professional behavior with personal accountability, drive, and work ethics'}, {'label': 'Benefit', 'name': 'The ability to shape the exciting journey of technology and be part of the very early days of one of Europe’s hottest startups'}, {'label': 'Benefit', 'name': 'A fun, young, international, and multicultural team — based in Paris, London, and San Francisco'}, {'label': 'Benefit', 'name': 'Beautiful office space in the heart of Paris (Canal St Martin)'}, {'label': 'Benefit', 'name': 'Competitive salary and benefits package'}, {'label': 'Benefit', 'name': 'Opportunities for professional growth and development'}, {'label': 'Experience Required', 'name': '5-7 years'}]}\n", + "[INFO] Extracting Relations from the Document 1\n", + "{'relationships': [{'startNode': 'visual designer', 'endNode': 'innovate design co.', 'name': 'job_offer_title'}, {'startNode': 'innovate design co.', 'endNode': 'paris', 'name': 'location'}, {'startNode': 'innovate design co.', 'endNode': 'london', 'name': 'location'}, {'startNode': 'innovate design co.', 'endNode': 'san francisco', 'name': 'location'}, {'startNode': 'visual designer', 'endNode': 'translate our core values, product, marketing, and sales objectives into beautifully crafted deliverables', 'name': 'responsibility'}, {'startNode': 'visual designer', 'endNode': 'design compelling, brand aligned digital and print materials, including websites, social media content, ads, third party marketplaces, presentations, animations, events, prints, etc.', 'name': 'responsibility'}, {'startNode': 'visual designer', 'endNode': 'develop and maintain visual brand identity guidelines, ensuring brand consistency across all media and multichannel platforms', 'name': 'responsibility'}, {'startNode': 'visual designer', 'endNode': \"communicate innovate design co.'s narrative through conversion and data driven design\", 'name': 'responsibility'}, {'startNode': 'visual designer', 'endNode': 'participate in brainstorming sessions and collaborate with stakeholders to articulate a creative vision that enhances our brand’s visual storytelling', 'name': 'responsibility'}, {'startNode': 'visual designer', 'endNode': 'promote design comprehension and sensibility across the organization, refining work methodologies and design processes to enhance efficiency and effectiveness', 'name': 'responsibility'}, {'startNode': 'visual designer', 'endNode': 'a bachelor’s degree (or equivalent) in graphic design / visual arts or a self starter with a strong creative project track record', 'name': 'qualification'}, {'startNode': 'visual designer', 'endNode': '5 7 years of experience in graphic design, including brand design, 360° marketing and communications design, product brand design, 0 to 1 projects, front end development, etc.', 'name': 'qualification'}, {'startNode': 'visual designer', 'endNode': 'work experience within well structured design departments operating in the tech/software space (including leading creative agencies, scale ups, and mature tech companies)', 'name': 'qualification'}, {'startNode': 'visual designer', 'endNode': 'proficiency in figma, adobe cc, print design best practices, and a solid understanding of web technologies (html, css, js)', 'name': 'qualification'}, {'startNode': 'visual designer', 'endNode': 'a robust portfolio demonstrating a variety of design projects, showcasing creativity, originality, consistency, and attention to detail', 'name': 'qualification'}, {'startNode': 'visual designer', 'endNode': 'perfectly fluent in english, both written and spoken', 'name': 'qualification'}, {'startNode': 'visual designer', 'endNode': 'results oriented, resourceful, innovative, intellectually curious, no ego, proactive', 'name': 'qualification'}, {'startNode': 'visual designer', 'endNode': 'highly collaborative and able to balance multiple projects and stakeholders', 'name': 'qualification'}, {'startNode': 'visual designer', 'endNode': 'professional behavior with personal accountability, drive, and work ethics', 'name': 'qualification'}, {'startNode': 'visual designer', 'endNode': 'the ability to shape the exciting journey of technology and be part of the very early days of one of europe’s hottest startups', 'name': 'benefit'}, {'startNode': 'visual designer', 'endNode': 'a fun, young, international, and multicultural team — based in paris, london, and san francisco', 'name': 'benefit'}, {'startNode': 'visual designer', 'endNode': 'beautiful office space in the heart of paris (canal st martin)', 'name': 'benefit'}, {'startNode': 'visual designer', 'endNode': 'competitive salary and benefits package', 'name': 'benefit'}, {'startNode': 'visual designer', 'endNode': 'opportunities for professional growth and development', 'name': 'benefit'}, {'startNode': 'visual designer', 'endNode': '5 7 years', 'name': 'experience_required'}]}\n", + "[INFO] Matching the Document 1 Entities and Relationships with the Existing Global Entities/Relations\n", + "[INFO] Wohoo ! Entity using embeddings is matched --- design compelling, brand aligned digital and print materials, including websites, social media content, ads, third party marketplaces, presentations, animations, events, prints, etc. -merged--> designed logos, brochures, and social media graphics. \n", + "[INFO] Wohoo ! Entity using embeddings is matched --- a bachelor’s degree (or equivalent) in graphic design / visual arts or a self starter with a strong creative project track record -merged--> bachelor of fine arts in graphic design \n", + "[INFO] Wohoo ! Entity using embeddings is matched --- highly collaborative and able to balance multiple projects and stakeholders -merged--> managed multiple projects simultaneously. \n", + "[INFO] Wohoo ! Entity using embeddings is matched --- 5 7 years -merged--> 3 years \n", + "[INFO] Wohoo ! Relation using embeddings is matched --- job offer title -merged--> has_job_title \n", + "[INFO] Wohoo ! Relation using embeddings is matched --- location -merged--> located_in \n", + "[INFO] Wohoo ! Relation using embeddings is matched --- location -merged--> located_in \n", + "[INFO] Wohoo ! Relation using embeddings is matched --- location -merged--> located_in \n", + "[INFO] Wohoo ! Relation using embeddings is matched --- experience required -merged--> experience_required \n" + ] + } + ], + "source": [ + "global_ent__, global_rel__ = itext2kg.build_graph(sections=[semantic_blocks_job_offer_2], existing_global_entities = global_ent_, existing_global_relationships = global_rel_, ent_threshold=0.6, rel_threshold=0.6)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Draw the graph\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The final section involves visualizing the constructed knowledge graph using GraphIntegrator. The graph database Neo4j is accessed using specified credentials, and the resulting graph is visualized to provide a visual representation of the relationships and entities extracted from the document." + ] + }, + { + "cell_type": "code", + "execution_count": 89, + "metadata": {}, + "outputs": [], + "source": [ + "from itext2kg.graph_integration import GraphIntegrator\n", + "\n", + "\n", + "URI = \"bolt://localhost:7687\"\n", + "USERNAME = \"neo4j\"\n", + "PASSWORD = \"allah0505\"\n", + "\n", + "\n", + "new_graph = {}\n", + "new_graph[\"nodes\"] = global_ent__\n", + "new_graph[\"relationships\"] = global_rel__\n", + "\n", + "GraphIntegrator(uri=URI, username=USERNAME, password=PASSWORD).visualize_graph(json_graph=new_graph)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.1" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}