/modules/uoa-graph-portal/trunk/src/app/about/about.component.html - Annotate - D-Net - D-Net project tracking tool

dnet40/modules/uoa-graph-portal/trunk/src/app/about/about.component.html @ 60442

-            k.triantaf
+<schema2jsonld [URL]="properties.domain"
                [logoURL]="properties.domain + '/assets/common-assets/logo-small-graph.png'"
                [description]="description"
                type="other"
                [name]="title">
 </schema2jsonld>
-            konstantin
+<div class="about">
-            k.triantaf
+  <div class="uk-section">
-            konstantin
+    <div class="uk-margin-large-left uk-margin-medium-bottom">
       <breadcrumbs [breadcrumbs]="breadcrumbs"></breadcrumbs>
     </div>
-            k.triantaf
+    <div class="firstBackground">
       <div class="uk-container">
         <h2 class="uk-text-center">About</h2>
         <div class="uk-flex uk-flex-center">
           <div class="uk-padding-small uk-width-4-5@m">
             <p>
               Open Science is gradually becoming the modus operandi in research practices, affecting the way researchers
               collaborate and publish, discover, and access scientific knowledge.
               Scientists are increasingly publishing research results beyond the article, to share all scientific
               products (metadata and files) generated during an experiment, such as datasets, software, experiments.
               They publish in scholarly communication data sources (e.g. institutional repositories, data archives,
               software repositories), rely where possible on persistent identifiers (e.g. DOI, ORCID, Grid.ac, PDBs),
               specify semantic links to other research products (e.g. supplementedBy, citedBy, versionOf), and possibly
               to projects and/or relative funders.
               By following such practices, scientists are implicitly constructing the Global Open Science Graph, where
               by "graph" we mean a collection of objects interlinked by semantic relationships.
               <br><br>
               The OpenAIRE Research Graph includes metadata and links between scientific products (e.g. literature,
               datasets, software, and "other research products"), organizations, funders, funding streams, projects,
               communities, and (provenance) data sources - the details of the <a
                 href="https://zenodo.org/record/2643199#.XOqdstMzZ24" target="_blank">graph data model</a> can be found
               in Zenodo.org.
               <br><br>
               The Graph is available and obtained as an aggregation of the metadata and links collected from ~70.000
               trusted sources, further enriched with metadata and links provided by:</p>
             <ul class="portal-circle">
               <li class="uk-margin-bottom">OpenAIRE end-users, e.g. researchers, project administrators, data curators
                 providing links from scientific products to projects, funders, communities, or other products;
               </li>
               <li class="uk-margin-bottom">OpenAIRE Full-text mining algorithms over around ~10Mi Open Access article
                 full-texts;
               </li>
               <li>Research infrastructure scholarly services, bridged to the graph via OpenAIRE, exposing metadata of
                 products such as research workflows, experiments, research objects, software, etc..
               </li>
             </ul>
           </div>
-            konstantin
+        </div>
       </div>
     </div>
-            konstantin
+  </div>
-            k.triantaf
+  <div id="architecture" class="uk-container uk-section">
     <div class="uk-padding-small">
       <h2 class="uk-text-center">Architecture</h2>
       <div class="uk-flex uk-flex-center">
         <div class="uk-width-4-5@m">
           <h3 class="uk-margin-medium-top portal-color">How we build it</h3>
           <div>
             <p>
               OpenAIRE collects metadata records from more than 70K scholarly communication sources from all over the
               world, including Open Access institutional repositories, data archives, journals.
               All the metadata records (i.e. descriptions of research products) are put together in a data lake,
               together
               with records from Crossref, Unpaywall, ORCID, Grid.ac, and information about projects provided by national
               and international funders.
               Dedicated inference algorithms applied to metadata and to the full-texts of Open Access publications
               enrich
               the content of the data lake with links between research results and projects, author affiliations,
               subject
               classification, links to entries from domain-specific databases.
               Duplicated organisations and results are identified and merged together to obtain an open, trusted, public
               resource enabling explorations of the scholarly communication landscape like never before.
             </p>
             konstantin
-            k.triantaf
+          </div>
-            konstantin
+        </div>
       </div>
-            k.triantaf
+      <div class="uk-flex uk-flex-center uk-inline uk-margin-medium-top">
         <img [src]="'assets/graph-assets/about/architecture/'+architectureImage"
              class="uk-width-4-5 architecture-image">
             konstantin
-            k.triantaf
+        <a class="uk-position-absolute uk-transform-center uk-padding" style="left: 27%; top: 48%"
-            k.triantaf
+           (click)="goTo('tabs_card'); changeTab(0)"
            (mouseenter)="architectureImage = 'aggregation_hover.png'" (mouseleave)="architectureImage = 'gray.png'">
-            k.triantaf
+          <action-point [class.uk-invisible]="architectureImage == 'aggregation_hover.png'"></action-point>
-            k.triantaf
+        </a>
-            k.triantaf
+        <a class="uk-position-absolute uk-transform-center uk-padding" style="left: 47%; top: 48%"
-            k.triantaf
+           (click)="goTo('tabs_card'); changeTab(1)"
            (mouseenter)="architectureImage = 'deduplication_hover.png'" (mouseleave)="architectureImage = 'gray.png'">
-            k.triantaf
+          <action-point [class.uk-invisible]="architectureImage == 'deduplication_hover.png'"></action-point>
-            k.triantaf
+        </a>
-            k.triantaf
+        <a class="uk-position-absolute uk-transform-center uk-padding" style="left: 58%; top: 48%"
-            k.triantaf
+           (click)="goTo('tabs_card'); changeTab(2)"
            (mouseenter)="architectureImage = 'enrichment_hover.png'" (mouseleave)="architectureImage = 'gray.png'">
-            k.triantaf
+          <action-point [class.uk-invisible]="architectureImage == 'enrichment_hover.png'"></action-point>
-            k.triantaf
+        </a>
-            k.triantaf
+        <a class="uk-position-absolute uk-transform-center uk-padding" style="left: 70%; top: 48%"
-            k.triantaf
+           (click)="goTo('tabs_card'); changeTab(3)"
            (mouseenter)="architectureImage = 'post_cleaning_hover.png'" (mouseleave)="architectureImage = 'gray.png'">
-            k.triantaf
+          <action-point [class.uk-invisible]="architectureImage == 'post_cleaning_hover.png'"></action-point>
-            k.triantaf
+        </a>
-            k.triantaf
+        <a class="uk-position-absolute uk-transform-center uk-padding" style="left: 76%; top: 32%"
-            k.triantaf
+           (click)="goTo('tabs_card'); changeTab(4)"
            (mouseenter)="architectureImage = 'indexing_hover.png'" (mouseleave)="architectureImage = 'gray.png'">
-            k.triantaf
+          <action-point [class.uk-invisible]="architectureImage == 'indexing_hover.png'"></action-point>
-            k.triantaf
+        </a>
-            k.triantaf
+        <a class="uk-position-absolute uk-transform-center uk-padding" style="left: 76%; top: 72%"
-            k.triantaf
+           (click)="goTo('tabs_card'); changeTab(5)"
            (mouseenter)="architectureImage = 'stats_analysis_hover.png'" (mouseleave)="architectureImage = 'gray.png'">
-            k.triantaf
+          <action-point [class.uk-invisible]="architectureImage == 'stats_analysis_hover.png'"></action-point>
-            k.triantaf
+        </a>
       </div>
       <div id="tabs_card"
-            k.triantaf
+           class="uk-margin-xlarge-top uk-padding-small">
         <div class="uk-card uk-card-default uk-card-body architecture-card">
           <ul #tabs uk-tab class="uk-tab">
             <li><a>Aggregation</a></li>
             <li><a>Deduplication</a></li>
             <li><a>Enrichment</a></li>
             <li><a>Post-Cleaning</a></li>
             <li><a>Indexing</a></li>
             <li><a>Stats Analysis</a></li>
           </ul>
             konstantin
-            k.triantaf
+          <ul class="uk-switcher uk-margin">
             <li>
               <!--            uk-grid-->
               <div class=" uk-margin-large-top uk-text-small">
                 <!--              <div class="uk-width-3-5@m">-->
                 <img class="uk-width-2-5@m uk-align-right@m uk-margin-remove-adjacent tab-image"
                      src="assets/graph-assets/about/architecture/aggregation.png" alt="Aggregation">
                 <div
                     [class]="'uk-margin-bottom uk-margin-medium-right '+(aggregationReadMore ? '' : 'lines-18 multi-line-ellipsis')">
                   <div>
                     OpenAIRE collects metadata records from a variety of content providers as described in
                     <a href="https://www.openaire.eu/aggregation-and-content-provision-workflows" target="_blank">https://www.openaire.eu/aggregation-and-content-provision-workflows</a>.
                     <br><br>
                     OpenAIRE aggregates metadata records describing objects of the research life-cycle from content
                     providers compliant to the
                     <a href="https://guidelines.openaire.eu" target="_blank">OpenAIRE guidelines</a>
                     and from entity registries (i.e. data sources offering authoritative lists of entities, like OpenDOAR,
                     re3data, DOAJ, and funder databases).
                     After collection, metadata are transformed according to the OpenAIRE internal metadata model, which is
                     used to generate the final OpenAIRE Research Graph that you can access from the OpenAIRE portal and
                     the
                     APIs.
                     <br><br>
                     The transformation process includes the application of cleaning functions whose goal is to ensure that
                     values are harmonised according to a common format (e.g. dates as YYYY-MM-dd) and, whenever
                     applicable,
                     to a common controlled vocabulary.
                     The controlled vocabularies used for cleansing are accessible at
                     <a href="http://api.openaire.eu/vocabularies" target="_blank">http://api.openaire.eu/vocabularies</a>.
                     Each vocabulary features a set of controlled terms, each with one code, one label, and a set of
                     synonyms.
                     If a synonym is found as field value, the value is updated with the corresponding term.
                     Also, the OpenAIRE Research Graph is extended with other relevant scholarly communication sources that
                     are too big to be integrated via the “normal” aggregation mechanism: DOIBoost (which merges Crossref,
                     ORCID, Microsoft Academic Graph, and Unpaywall), and ScholeXplorer, one of the Scholix hubs offering a
                     large set of links between research literature and data.
                   </div>
-            konstantin
+                </div>
-            k.triantaf
+                <div *ngIf="!aggregationReadMore" class="uk-width-3-5@m uk-text-center clickable"
                      (click)="aggregationReadMore = true">
                   <a class="custom-explore-toggle">Read more<span uk-icon="chevron-down"></span></a>
                 </div>
                 <div *ngIf="aggregationReadMore" class="uk-width-3-5@m uk-text-center clickable"
                      (click)="aggregationReadMore = false">
                   <a class="custom-explore-toggle">Read less<span uk-icon="chevron-up"></span></a>
                 </div>
                 <!--              </div>-->
                 <!--              <div class="uk-width-expand">-->
                 <!--                <img src="assets/graph-assets/about/architecture/aggregation.png">-->
                 <!--              </div>-->
-            konstantin
+              </div>
-            k.triantaf
+            </li>
             <li>
               <div class="uk-grid">
                 <!--              <div class="uk-width-3-5@m">-->
                 <div class="uk-margin-bottom uk-margin-medium-right uk-text-small">
                   <ul class="uk-subnav button-tab" uk-switcher>
                     <li><a>Clustering</a></li>
                     <li><a>Matching & Election</a></li>
                   </ul>
             konstantin
-            k.triantaf
+                  <ul class="uk-switcher uk-margin align-list">
                     <li>
                       <img class="uk-width-2-5@m uk-align-right@m uk-margin-remove-adjacent tab-image"
                            src="assets/graph-assets/about/architecture/deduplication.svg" alt="Deduplication">
                       <div
                           [class]="'uk-margin-bottom uk-margin-medium-right uk-text-small '+(dedupClusteringReadMore ? '' : 'lines-18 multi-line-ellipsis')">
-            konstantin
+                        <div>
-            k.triantaf
+                          <div>
                             Clustering is a common heuristics used to overcome the N x N complexity required to match all
                             pairs of objects to identify the equivalent ones.
                             The challenge is to identify a clustering function that maximizes the chance of comparing only
                             records that may lead to a match, while minimizing the number of records that will not be
                             matched while being equivalent.
                             Since the equivalence function is to some level tolerant to minimal errors (e.g. switching of
                             characters in the title, or minimal difference in letters), we need this function to be not
                             too
                             precise (e.g. a hash of the title), but also not too flexible (e.g. random ngrams of the
                             title).
                             On the other hand, reality tells us that in some cases equality of two records can only be
                             determined by their PIDs (e.g. DOI) as the metadata properties are very different across
                             different versions and no clustering function will ever bring them into the same cluster.
                             To match these requirements OpenAIRE clustering for products works with two functions:
                           </div>
                           <ul class="portal-circle">
                             <li>
                               <div>DOI: the function generates the DOI when this is provided as part of the record
                                 properties;
                               </div>
                             </li>
                             <li>
                               <div>
                                 Title-based function: the function generates a key that depends on
                                 (i) number of significant words in the title (normalized, stemming, etc.),
                                 (ii) module 10 of the number of characters of such words, and
                                 (iii) a string obtained as an alternation of the function prefix(3) and suffix(3) (and
                                 vice
                                 versa) o the first 3 words (2 words if the title only has 2). For example, the title
                                 “Entity
                                 deduplication in big data graphs for scholarly communication” becomes “entity
                                 deduplication
                                 big data graphs scholarly communication” with two keys key “7.1entionbig” and
                                 “7.1itydedbig”
                                 (where 1 is module 10 of 54 characters of the normalized title.
                               </div>
                             </li>
                           </ul>
                           <div>
                             To give an idea, this configuration generates around 77Mi blocks, which we limited to 200
                             records each (only 15K blocks are affected by the cut), and entails 260Bi matches. Matches in
+                            a
                             block are performed using a “sliding window” set to 80 records. The records are sorted
                             lexicographically on a normalized version of their titles. The 1st record is matched against
                             all
                             the 80 following ones, then the second, etc. for an NlogN complexity.
                           </div>
-            konstantin
+                        </div>
-            k.triantaf
+                      </div>
                       <div *ngIf="!dedupClusteringReadMore" class="uk-width-3-5@m uk-text-center clickable"
                            (click)="dedupClusteringReadMore = true">
                         <a class="custom-explore-toggle">Read more<span uk-icon="chevron-down"></span></a>
                       </div>
                       <div *ngIf="dedupClusteringReadMore" class="uk-width-3-5@m uk-text-center clickable"
                            (click)="dedupClusteringReadMore = false">
                         <a class="custom-explore-toggle">Read less<span uk-icon="chevron-up"></span></a>
                       </div>
                     </li>
                     <li>
                       <img class="uk-width-2-5@m uk-align-right@m uk-margin-remove-adjacent tab-image"
                            src="assets/graph-assets/about/architecture/deduplication.svg" alt="Deduplication">
                       <div
                           [class]="'uk-margin-bottom uk-margin-medium-right uk-text-small '+(dedupMatchingAndElectionReadMore ? '' : 'lines-18 multi-line-ellipsis')">
                         <div>
                           <div>
                             Once the clusters have been built, the algorithm proceeds with the comparisons.
                             Comparisons are driven by a decisional tree that:
                           </div>
                           <ul class="uk-list">
                             <li class="uk-margin-small-bottom">
                               <div>
                                 <span class="portal-color">1.</span> Tries to capture equivalence via PIDs: if records
                                 share
                                 a PID then they are equivalent
                               </div>
                             </li>
                             <li class="uk-margin-small-bottom">
                               <div>
                                 <span class="portal-color">2.</span> Tries to capture difference:
                               </div>
                               <ul class="uk-list">
                                 <li class="uk-margin-small-bottom">
                                   <div>
                                     <span class="portal-color">a.</span>
                                     If record titles contain different “numbers” then they are different (this rule is
                                     subject to different feelings, and should be fine-tuned);
                                   </div>
                                 </li>
                                 <li class="uk-margin-small-bottom">
                                   <div>
                                     <span class="portal-color">b.</span>
                                     If record contain different number of authors then they are different;
                                   </div>
                                 </li>
                                 <li class="uk-margin-small-bottom">
                                   <div>
                                     <span class="portal-color">c.</span>
                                     Note that different PIDs do not imply different records, as different versions may
                                     have
                                     different PIDs.
                                   </div>
                                 </li>
                               </ul>
                             </li>
                             <li>
                               <div><span class="portal-color">3.</span> Measures equivalence:</div>
                               <ul class="uk-list portal-circle">
                                 <li>
                                   <div>
                                     The titles of the two records are normalised and compared for similarity by applying
                                     the
                                     Levenstein distance algorithm.
                                     The algorithm returns a number in the range [0,1], where 0 means “very different” and
                                     means “equal”.
                                     If the distance is greater than or equal 0,99 the two records are identified as
                                     duplicates.
                                   </div>
                                 </li>
                                 <li>
                                   <div>Dates are not regarded for equivalence matching because different versions of the
                                     same records should be merged and may be published on different dates, e.g. pre-print
                                     and published version of an article.
                                   </div>
                                 </li>
                               </ul>
                             </li>
                           </ul>
                           <div>
                             Once the equivalence relationships between pairs of records are set, the groups of equivalent
                             records are obtained (transitive closure, i.e. “mesh”).
                             From such sets a new representative object is obtained, which inherits all properties from the
                             merged records and keeps track of their provenance.
                             The ID of the record is obtained by appending the prefix “dedup_” to the MD5 of the first ID
                             (given their lexicographical ordering).
                             A new, more stable function to generate the ID is under development, which exploits the DOI
                             when
                             one of the records to be merged includes a Crossref or a DataCite record.
                           </div>
                         </div>
                       </div>
                       <div *ngIf="!dedupMatchingAndElectionReadMore" class="uk-width-3-5@m uk-text-center clickable"
                            (click)="dedupMatchingAndElectionReadMore = true">
                         <a class="custom-explore-toggle">Read more<span uk-icon="chevron-down"></span></a>
                       </div>
                       <div *ngIf="dedupMatchingAndElectionReadMore" class="uk-width-3-5@m uk-text-center clickable"
                            (click)="dedupMatchingAndElectionReadMore = false">
                         <a class="custom-explore-toggle">Read less<span uk-icon="chevron-up"></span></a>
                       </div>
                     </li>
                   </ul>
                 </div>
                 <!--              </div>-->
                 <!--              <div class="uk-width-expand">-->
                 <!--                <img src="assets/graph-assets/about/architecture/deduplication.svg">-->
                 <!--              </div>-->
               </div>
             </li>
             <li>
               <div class="uk-grid">
                 <!--              <div class="uk-width-3-5@m">-->
                 <div class="uk-margin-bottom uk-margin-medium-right uk-text-small">
                   <ul class="uk-subnav button-tab uk-grid uk-grid-small" uk-switcher>
                     <li><a>General</a></li>
                     <li><a>Mining</a></li>
                     <li><a>Bulk tagging/ Deduction</a></li>
                     <li><a>Propagation</a></li>
                   </ul>
             konstantin
-            k.triantaf
+                  <ul class="uk-switcher uk-margin">
                     <li>
                       <img class="uk-width-2-5@m uk-align-right@m uk-margin-remove-adjacent tab-image"
                            src="assets/graph-assets/about/architecture/enrichment.svg" alt="Enrichment">
                       <div class="uk-margin-bottom uk-margin-medium-right uk-text-small">
                         <p>
                           The aggregation processes are continuously running and apply vocabularies as they are in a given
                           moment of time.
                           It could be the case that a vocabulary changes after the aggregation of one data source has
                           finished,
                           thus the aggregated content does not reflect the current status of the controlled vocabularies.
                           <br><br>
                           In addition, the integration of ScholeXplorer and DOIBooost and some enrichment processes
                           applied
                           on the raw
                           and on the de-duplicated graph may introduce values that do not comply with the current status
                           of
                           the OpenAIRE controlled vocabularies.
                           For these reasons, we included a final step of cleansing at the end of the workflow
                           materialisation.
                           The output of the final cleansing step is the final version of the OpenAIRE Research Graph.
                         </p>
                       </div>
                     </li>
                     <li>
                       <img class="uk-width-2-5@m uk-align-right@m uk-margin-remove-adjacent tab-image"
                            src="assets/graph-assets/about/architecture/enrichment.svg" alt="Enrichment">
                       <div
                           [class]="'uk-margin-bottom uk-margin-medium-right uk-text-small '+(enrichmentMiningReadMore ? '' : 'lines-18 multi-line-ellipsis')">
-            konstantin
+                        <div>
-            k.triantaf
+                          <div>
                             The OpenAIRE Research Graph is enriched by links mined by OpenAIRE’s full-text mining
                             algorithms
                             that scan the plaintexts of publications for funding information, references to datasets,
                             software URIs, accession numbers of bioetities, and EPO patent mentions.
                             Custom mining modules also link research objects to specific research communities, initiatives
                             and infrastructures.
                             In addition, other inference modules provide content-based document classification, document
                             similarity, citation matching, and author affiliation matching.
                             <br><br>
                             <span class="portal-color">Project mining</span>
                             in OpenAIRE text mines the full-texts of publications in order to extract matches to funding
                             project codes/IDs.
                             The mining algorithm works by utilising
                             (i) the grant identifier, and
                             (ii) the project acronym (if available) of each project.
                             The mining algorithm:
                             (1) Preprocesses/normalizes the full-texts using several functions, which depend on the
                             characteristics of each funder (i.e., the format of the grant identifiers), such as stopword
                             and/or punctuation removal, tokenization, stemming, converting to lowercase; then
                             (2) String matching of grant identifiers against the normalized text is done using database
                             techniques; and
                             (3) The results are validated and cleaned using the context near the match by looking at the
                             context around the matched ID for relevant metadata and positive or negative words/phrases, in
                             order to calculate a confidence value for each publication-->project link.
                             A confidence threshold is set to optimise high accuracy while minimising false positives, such
                             as matches with page or report numbers, post/zip codes, parts of telephone numbers, DOIs or
                             URLs, accession numbers.
                             The algorithm also applies rules for disambiguating results, as different funders can share
                             identical project IDs; for example, grant number 633172 could refer to H2020 project EuroMix
                             but
                             also to Australian-funded NHMRC project “Brain activity (EEG) analysis and brain imaging
                             techniques to measure the neurobiological effects of sleep apnea”.
                             Project mining works very well and was the first Text & Data Mining (TDM) service of OpenAIRE.
                             Performance results vary from funder to funder but precision is higher than 98% for all
                             funders
                             and 99.5% for EC projects.
                             Recall is higher than 95% (99% for EC projects), when projects are properly acknowledged using
                             project/grant IDs.
                             <br><br>
                             <span class="portal-color">Dataset extraction</span>
                             runs on publications full-texts as described in “High pass text-filtering for Citation
                             matching”, TPDL 2017[1].
                             In particular, we search for citations to datasets using their DOIs, titles and other metadata
                             (i.e., dates, creator names, publishers, etc.).
                             We extract parts of the text which look like citations and search for datasets using database
                             join and pattern matching techniques.
                             Based on the experiments described in the paper, precision of the dataset extraction module is
 .5% and recall is 97.4% but it is also probably overestimated since it does not take into
                             account corruptions that may take place during pdf to text extraction.
                             It is calculated on the extracted full-texts of small samples from PubMed and arXiv.
                             <br><br>
                             <span class="portal-color">Software extraction</span>
                             runs also on parts of the text which look like citations.
                             We search the citations for links to software in open software repositories, specifically
                             github, sourceforge, bitbucket and the google code archive.
                             After that, we search for links that are included in Software Heritage (SH,
                             https://www.softwareheritage.org) and return the permanent URL that SH provides for each
                             software project.
                             We also enrich this content with user names, titles and descriptions of the software projects
                             using web mining techniques.
                             Since software mining is based on URL matching, our precision is 100% (we return a software
                             link
                             only if we find it in the text and there is no need to disambiguate).
                             As for recall rate, this is not calculable for this mining task.
                             Although we apply all the necessary normalizations to the URLs in order to overcome usual
                             issues
                             (e.g., http or https, existence of www or not, lower/upper case), we do not calculate cases
                             where a software is mentioned using its name and not by a link from the supported software
                             repositories.
                             <br><br>
                             <span class="portal-color">For the extraction of bio-entities</span>, we focus on Protein Data
                             Bank (PDB) entries.
                             We have downloaded the database with PDB codes and we update it regularly.
                             We search through the whole publication’s full-text for references to PDB codes.
                             We apply disambiguation rules (e.g., there are PDB codes that are the same as antibody codes
                             or
                             other issues) so that we return valid results.
                             Current precision is 98%.
                             Although it's risky to mention recall rates since these are usually overestimated, we have
                             calculated a recall rate of 98% using small samples from pubmed publications.
                             Moreover, our technique is able to identify about 30% more links to proteins than the ones
                             that
                             are tagged in Pubmed xmls.
                             <br><br>
                             <span class="portal-color">Other text-mining modules</span> include mining for links to EPO
                             patents, or custom mining modules for linking research objects to specific research
                             communities,
                             initiatives and infrastructures, e.g. COVID-19 mining module.
                             Apart from text-mining modules, OpenAIRE also provides a document classification service that
                             employs analysis of free text stemming from the abstracts of the publications.
                             The purpose of applying a document classification module is to assign a scientific text one or
                             more predefined content classes.
                             In OpenAIRE, the currently used taxonomies are arXiv, MeSH (Medical Subject Headings), ACM and
                             DDC (Dewey Decimal Classification, or Dewey Decimal System).
                             <br><br>
                             <hr>
                             [1] Foufoulas, Y., Stamatogiannakis, L., Dimitropoulos, H., & Ioannidis, Y. (2017, September).
                             High-Pass Text Filtering for Citation Matching.
                             In International Conference on Theory and Practice of Digital Libraries (pp. 355-366).
                             Springer,
                             Cham.
                           </div>
-            konstantin
+                        </div>
                       </div>
-            k.triantaf
+                      <div *ngIf="!enrichmentMiningReadMore" class="uk-width-3-5@m uk-text-center clickable"
                            (click)="enrichmentMiningReadMore = true">
                         <a class="custom-explore-toggle">Read more<span uk-icon="chevron-down"></span></a>
                       </div>
                       <div *ngIf="enrichmentMiningReadMore" class="uk-width-3-5@m uk-text-center clickable"
                            (click)="enrichmentMiningReadMore = false">
                         <a class="custom-explore-toggle">Read less<span uk-icon="chevron-up"></span></a>
                       </div>
                     </li>
                     <li>
                       <img class="uk-width-2-5@m uk-align-right@m uk-margin-remove-adjacent tab-image"
                            src="assets/graph-assets/about/architecture/enrichment.svg" alt="Enrichment">
                       <div class="uk-margin-bottom uk-margin-medium-right uk-text-small">
                         The Deduction process (also known as “bulk tagging”) enriches each record with new information
                         that
                         can be derived from the existing property values.
                         <br><br>
                         As of September 2020, three procedures are in place to relate a research product to a research
                         initiative, infrastructure (RI) or community (RC) based on:
                         <ul class="portal-circle">
                           <li>subjects (2.7M results tagged)</li>
                           <li>Zenodo community (16K results tagged)</li>
                           <li>the data source it comes from (250K results tagged)</li>
                         </ul>
                         The list of subjects, Zenodo communities and data sources used to enrich the products are defined
                         by
                         the managers of the community gateway or infrastructure monitoring dashboard associated with the
                         RC/RI.
                       </div>
                     </li>
                     <li>
                       <img class="uk-width-2-5@m uk-align-right@m uk-margin-remove-adjacent tab-image"
                            src="assets/graph-assets/about/architecture/enrichment.svg" alt="Enrichment">
                       <div
                           [class]="'uk-margin-bottom uk-margin-medium-right uk-text-small '+(enrichmentPropagationReadMore ? '' : 'lines-18 multi-line-ellipsis')">
-            konstantin
+                        <div>
-            k.triantaf
+                          <div>
                             This process “propagates” properties and links from one product to another if between the two
                             there is a “strong” semantic relationship.
                             <br><br>
                             As of September 2020, the following procedures are in place:
                             <ul class="portal-circle">
                               <li>
                                 Propagation of the property “country” to results from institutional repositories:
                                 e.g. publication collected from an institutional repository maintained by an italian
                                 university will be enriched with the property “country = IT”.
-            konstantin
+                              </li>
-            k.triantaf
+                              <li>
                                 Propagation of links to projects: e.g. publication linked to project P “is supplemented
                                 by”
                                 a dataset D.
                                 Dataset D will get the link to project P.
                                 The relationships considered for this procedure are “isSupplementedBy” and “supplements”.
-            konstantin
+                              </li>
-            k.triantaf
+                              <li>
                                 Propagation of related community/infrastructure/initiative from organizations to products
                                 via affiliation relationships: e.g. a publication with an author affiliated with
                                 organization O.
                                 The manager of the community gateway C declared that the outputs of O are all relevant for
                                 his/her community C.
                                 The publication is tagged as relevant for C.
-            konstantin
+                              </li>
                               <li>
-            k.triantaf
+                                Propagation of related community/infrastructure/initiative to related products: e.g.
                                 publication associated to community C is supplemented by a dataset D.
                                 Dataset D will get the association to C.
                                 The relationships considered for this procedure are “isSupplementedBy” and “supplements”.
-            konstantin
+                              </li>
                               <li>
-            k.triantaf
+                                Propagation of ORCID identifiers to related products, if the products have the same
                                 authors:
                                 e.g. publication has ORCID for its authors and is supplemented by a dataset D. Dataset D
                                 has
                                 the same authors as the publication. Authors of D are enriched with the ORCIDs available
                                 in
                                 the publication.
                                 The relationships considered for this procedure are “isSupplementedBy” and “supplements”.
-            konstantin
+                              </li>
                             </ul>
-            k.triantaf
+                          </div>
-            konstantin
+                        </div>
                       </div>
-            k.triantaf
+                      <div *ngIf="!enrichmentPropagationReadMore" class="uk-width-3-5@m uk-text-center clickable"
                            (click)="enrichmentPropagationReadMore = true">
                         <a class="custom-explore-toggle">Read more<span uk-icon="chevron-down"></span></a>
-            konstantin
+                      </div>
-            k.triantaf
+                      <div *ngIf="enrichmentPropagationReadMore" class="uk-width-3-5@m uk-text-center clickable"
                            (click)="enrichmentPropagationReadMore = false">
                         <a class="custom-explore-toggle">Read less<span uk-icon="chevron-up"></span></a>
-            konstantin
+                      </div>
-            k.triantaf
+                    </li>
                   </ul>
                 </div>
                 <!--              </div>-->
                 <!--              <div class="uk-width-expand">-->
                 <!--                <img src="assets/graph-assets/about/architecture/enrichment.svg">-->
                 <!--              </div>-->
-            konstantin
+              </div>
-            k.triantaf
+            </li>
             <li>
               <div class="uk-text-small uk-margin-large-top">
                 <!--              <div class="uk-width-3-5@m">-->
                 <img class="uk-width-2-5@m uk-align-right@m uk-margin-remove-adjacent tab-image"
                      src="assets/graph-assets/about/architecture/post_cleaning.svg" alt="Post Cleaning">
                 <div class="uk-margin-bottom uk-margin-medium-right">
                   <p>
-            konstantin
+                    The aggregation processes are continuously running and apply vocabularies as they are in a given moment of time.
                     It could be the case that a vocabulary changes after the aggregation of one data source has finished, thus the aggregated content does not reflect the current status of the controlled vocabularies.
                     <br><br>
                     In addition, the integration of ScholeXplorer and DOIBoost and some enrichment processes applied on the raw and on the de-duplicated graph may introduce values that do not comply with the current status of the OpenAIRE controlled vocabularies.
                     For these reasons, we included a final step of cleansing at the end of the workflow materialisation.
                     The output of the final cleansing step is the final version of the OpenAIRE Research Graph.
-            k.triantaf
+                  </p>
                 </div>
                 <!--              </div>-->
                 <!--              <div class="uk-width-expand">-->
                 <!--                <img src="assets/graph-assets/about/architecture/post_cleaning.svg">-->
                 <!--              </div>-->
-            k.triantaf
+              </div>
-            k.triantaf
+            </li>
             <li>
               <div class="uk-text-small uk-margin-large-top">
                 <!--              <div class="uk-width-3-5@m">-->
                 <img class="uk-width-2-5@m uk-align-right@m uk-margin-remove-adjacent tab-image"
                      src="assets/graph-assets/about/architecture/indexing.svg" alt="Indexing">
                 <div class="uk-margin-bottom uk-margin-medium-right">
                   <p>
                     The final version of the OpenAIRE Research Graph is indexed on a Solr server that is used by the
                     OpenAIRE portals (EXPLORE, CONNECT, PROVIDE) and APIs, the latter adopted by several third-party
                     applications and organizations, such as:
                   </p>
                   <ul class="portal-circle">
                     <li class="uk-margin-small-bottom">
                       <span class="portal-color">EOSC</span>
                       --The OpenAIRE Research Graph APIs and Portals will offer to the EOSC an Open Science Resource
                       Catalogue, keeping an up to date map of all research results (publications, datasets, software),
                       services, organizations, projects, funders in Europe and beyond.
                     </li>
                     <li class="uk-margin-small-bottom">
                       <span class="portal-color">DSpace & EPrints</span>
                       repositories can install the OpenAIRE plugin to expose OpenAIRE compliant metadata records via their
                       OAI-PMH endpoint and offer to researchers the possibility to link their depositions to the funding
                       project, by selecting it from the list of project provided by OpenAIRE
                     </li>
                     <li>
                       <span class="portal-color">EC participant portal (Sygma - System for Grant Management)</span>
                       uses the OpenAIRE API in the “Continuous Reporting” section.
                       Sygma automatically fetches from the OpenAIRE Search API the list of publications and datasets in
                       the
                       OpenAIRE Research Graph that are linked to the project.
                       The user can select the research products from the list and easily compile the continuous reporting
                       data of the project.
                     </li>
                   </ul>
                 </div>
                 <!--              </div>-->
                 <!--              <div class="uk-width-expand">-->
                 <!--                <img src="assets/graph-assets/about/architecture/indexing.svg">-->
                 <!--              </div>-->
-            k.triantaf
+              </div>
-            k.triantaf
+            </li>
             <li>
               <div class="uk-text-small uk-margin-large-top">
                 <!--              <div class="uk-width-3-5@m">-->
                 <img
                     class="uk-width-2-5@m uk-align-right@m uk-margin-remove-adjacent tab-image uk-padding-large uk-padding-remove-top uk-padding-remove-horizontal"
                     src="assets/graph-assets/about/architecture/stats_analysis.svg" alt="Stats Analysis">
                 <div class="uk-margin-bottom uk-margin-medium-right">
                   <p>
                     The OpenAIRE Research Graph is also processed by a pipeline for extracting the statistics and
                     producing
                     the charts for funders, research initiative, infrastructures, and policy makers that you can see on
                     MONITOR.
                     Based on the information available on the graph, OpenAIRE provides a set of indicators for monitoring
                     the funding and research impact and the uptake of Open Science publishing practices,
                     such as Open Access publishing of publications and datasets, availability of interlinks between
                     research
                     products, availability of post-print versions in institutional or thematic Open Access repositories,
                     etc.
                   </p>
                 </div>
                 <!--              </div>-->
                 <!--              <div class="uk-width-expand">-->
                 <!--                <img src="assets/graph-assets/about/architecture/stats_analysis.svg">-->
                 <!--              </div>-->
-            k.triantaf
+              </div>
-            k.triantaf
+            </li>
           </ul>
         </div>
-            k.triantaf
+      </div>
-            k.triantaf
+      <div class="uk-padding-small uk-margin-top">
-            k.triantaf
+        <h6>References</h6>
         <ul class="uk-text-small portal-circle">
           <li>
             <a href="https://aka.ms/msracad" target="_blank">Microsoft Academic Graph</a>
             which is made available under the ODC Attribution License.
             For more information on Microsoft Academic Graph please also read
             <a href="https://docs.microsoft.com/en-us/academic-services/graph/resources-faq" target="_blank">here</a>.
           </li>
           <li>
             <a href="https://www.openaire.eu/aggregation-and-content-provision-workflows" target="_blank">https://www.openaire.eu/aggregation-and-content-provision-workflows</a>
           </li>
         </ul>
       </div>
-            konstantin
+    </div>
   </div>
   <div id="metrics" class="uk-container uk-container-large uk-section">
-            k.triantaf
+    <div class="uk-padding-small">
-            konstantin
+      <h2 class="uk-text-center">Data & Metrics</h2>
-            konstantin
+      <h4 class="uk-text-center uk-margin-medium-top portal-color">Coming soon...</h4>
-            k.triantaf
+      <!--        <div>-->
       <!--          <h3 class="uk-margin-medium-top portal-color">Data</h3>-->
       <!--          <div></div>-->
       <!--        </div>-->
       <!--        <div>-->
       <!--          <h3 class="uk-margin-medium-top portal-color">Metrics</h3>-->
       <!--          <div></div>-->
       <!--        </div>-->
-            konstantin
+    </div>
-            konstantin
+  </div>
-            konstantin
+  <div id="infrastructure" class="uk-container uk-section">
-            k.triantaf
+    <div class="uk-padding-small">
       <h2 class="uk-text-center">Infrastructure</h2>
-            k.triantaf
+      <div>
-            konstantin
+        <div class="uk-flex uk-flex-center uk-grid uk-grid-stack">
 <!--          <div>-->
             <p class="uk-width-4-5@m uk-padding-small">
-            argiro.kok
+              The OpenAIRE Research Graph is operated and maintained at the <a
               href="https://icm.edu.pl/en/centre-of-technology/" target="_blank">ICM cutting-edge Technology centre</a>
               with the facilities and staff guaranteeing robust operation of the whole system.
-            konstantin
+              Okeanos SuperComputer hosting the graph consists of 26016 cores in total providing 1082 Tflops/s.
               Whole setup is energy efficient with 1.554 Gflops/Watts Power Efficiency resulting in 160th place on the "Top500 by energy-eficiency" list (as of 2019).
             </p>
-            k.triantaf
+            <img class="infrastructure-image uk-margin-top uk-margin-bottom" src="assets/graph-assets/about/infrastructure.png">
-            konstantin
+            <p class="uk-width-4-5@m uk-padding-small">
               ICM supports the continuous operation of the infrastructure including data aggregation, deduplication, inference and provision ensuring seamless 24/7 system uptime and availability.
               System administration activities cover hardware maintenance and provisioning of the new computational resources, providing High Availability solutions to address resilience to failures by service-level redundancy and Load Balancing to distribute workloads uniformly across servers.
               The most crucial parts of the persisted graph are covered with backups along with well defined restore procedures.
               All the monitoring activities rely on an aggregated system-level monitoring accessible via various dashboards giving the better overview of system stability and potential requirements for system elements extension.
               System level monitoring is supplemented with monitoring availability of all the publicly accessible endpoints.
               Hence, the offer of the public API of OpenAIRE to third parties, is of high-standards.
             </p>
             <p class="uk-width-4-5@m uk-padding-small">
               All the maintenance operations undertaken by experienced system administrators are founded on well established routines and emergency maintenance procedures.
             </p>
 <!--            The OpenAIRE graph operates based on a vast variety of hardware and software. As of December 2019, the-->
 <!--            hardware infrastructure is the following:-->
 <!--          </p>-->
 <!--          </div>-->
-            k.triantaf
+        </div>
-            konstantin
+      </div>
     </div>
-            konstantin
+  </div>
   <div id="team" class="uk-container uk-container-large uk-section">
-            k.triantaf
+    <div class="uk-padding-small">
       <h2 class="uk-text-center">Team</h2>
       <div>
         <div class="uk-margin-bottom">
-            konstantin
+<!--          <div class="uk-flex uk-flex-middle uk-grid" uk-grid="">-->
 <!--            <div class="uk-text-center uk-width-1-1@s uk-width-1-3@m uk-first-column">-->
 <!--              <img src="assets/graph-assets/about/team.svg">-->
 <!--            </div>-->
             konstantin
-            konstantin
+          <img class="uk-align-center uk-align-left@m uk-margin-remove-adjacent"
                src="assets/graph-assets/about/team.svg" alt="Team">
           <div class="uk-text-center uk-width-1-2@m uk-align-center uk-margin-remove-adjacent">
             <div class="uk-margin-medium-bottom">
               Key team members contributing to the Research Graph
-            k.triantaf
+            </div>
-            konstantin
+            <div>
               <a class="uk-button portal-button" target="_blank" href="https://www.openaire.eu/research-graph-team">
                 Meet the team
-            k.triantaf
+                <icon name="arrow_right" ratio="0.8" class="space"></icon>
-            konstantin
+              </a>
             </div>
-            konstantin
+          </div>
-            konstantin
+<!--          </div>-->
-            konstantin
+        </div>
       </div>
     </div>
-            konstantin
+  </div>
-            k.triantaf
+</div>

Project

General

Profile

D-Net

dnet40/modules/uoa-graph-portal/trunk/src/app/about/about.component.html @ 60442