diff --git a/README.md b/README.md index 53cc5408..ed216c73 100644 --- a/README.md +++ b/README.md @@ -59,3 +59,19 @@ Unit tests are written in [pytest](https://docs.pytest.org/en/7.3.x/) and execut > Kemp, C., Xu, Y., & Regier, T. (2018). Semantic Typology and Efficient Communication. Annual Review of Linguistics, 4(1), 109–128. https://doi.org/10.1146/annurev-linguistics-011817-045406 + +## Citation + +``` +@article{imel2025ultk, + author = {Imel, Nathaniel and Haberland, Claire and Steinert-Threlkeld, Shane}, + title = {The Unnatural Language ToolKit (ULTK)}, + journal = {Proceedings of the Society for Computation in Linguistics}, + volume = {8}, + number = {1}, + pages = {46}, + year = {2025}, + doi = {10.7275/scil.3144}, + url = {https://doi.org/10.7275/scil.3144} +} +``` diff --git a/src/examples/__init__.py b/src/examples/__init__.py index 1e064995..588142a2 100644 --- a/src/examples/__init__.py +++ b/src/examples/__init__.py @@ -1,3 +1 @@ -"""Minimal examples demonstrating how to use ULTK. - -""" +"""Minimal examples demonstrating how to use ULTK.""" diff --git a/src/examples/modals/scripts/analyze.py b/src/examples/modals/scripts/analyze.py index d65aa2c5..dc3d25b6 100644 --- a/src/examples/modals/scripts/analyze.py +++ b/src/examples/modals/scripts/analyze.py @@ -30,11 +30,10 @@ pn.aes( fill="degree_iff", # shape="type", - ), + ), color="black", size=6, ) - + pn.geom_point( # The natural languages natural_data, color="red", @@ -48,10 +47,10 @@ size=6, # orig 9 nudge_x=1, # color="white", - ) + ) + pn.scale_fill_continuous( "cividis", - name="naturalness", + name="naturalness", ) + pn.theme_classic() + pn.xlab("Complexity") @@ -66,6 +65,7 @@ import numpy as np from scipy.stats import ttest_1samp, linregress + print( ttest_1samp( explored_data["distance"].values, @@ -79,4 +79,4 @@ explored_data["degree_iff"].values, ) ) - # breakpoint() \ No newline at end of file + # breakpoint() diff --git a/src/examples/modals/scripts/combine_data.py b/src/examples/modals/scripts/combine_data.py index db31d3c9..ccd3ce3a 100644 --- a/src/examples/modals/scripts/combine_data.py +++ b/src/examples/modals/scripts/combine_data.py @@ -31,8 +31,11 @@ def yaml_to_dataframe(filename: str, keys: list[str]) -> pd.DataFrame: ) from ultk.effcomm.tradeoff import pareto_min_distances + all_points = all_data[["complexity", "comm_cost"]].values - pareto_points = all_data[all_data["type"] == "dominant"][["complexity", "comm_cost"]].values + pareto_points = all_data[all_data["type"] == "dominant"][ + ["complexity", "comm_cost"] + ].values min_distances = pareto_min_distances(points=all_points, pareto_points=pareto_points) all_data["distance"] = min_distances diff --git a/src/ultk/effcomm/__init__.py b/src/ultk/effcomm/__init__.py index e2d2d86c..086bbed2 100644 --- a/src/ultk/effcomm/__init__.py +++ b/src/ultk/effcomm/__init__.py @@ -1,4 +1,4 @@ -"""Tools for measuring languages for communicative efficiency. +"""Tools for measuring languages for communicative efficiency. Submodules divide the labor of a computational experiment performing an efficiency analysis of a language into several parts: generating and sampling the space of possible languages, measuring their properties, and determining which languages optimize efficient trade-offs w.r.t these properties. diff --git a/src/ultk/effcomm/sampling.py b/src/ultk/effcomm/sampling.py index f246660b..86ea7fc6 100644 --- a/src/ultk/effcomm/sampling.py +++ b/src/ultk/effcomm/sampling.py @@ -1,5 +1,4 @@ -"""Functions for sampling expressions into languages. -""" +"""Functions for sampling expressions into languages.""" import copy from typing import Any diff --git a/src/ultk/language/__init__.py b/src/ultk/language/__init__.py index b2adaf9a..1cbfd2ab 100644 --- a/src/ultk/language/__init__.py +++ b/src/ultk/language/__init__.py @@ -2,7 +2,7 @@ At the current stage of development, ULTK focuses on supporting abstractions to model the mapping between expressions and meanings of a language. So far, we leave almost everything besides this basic mapping (morphosyntax, phonology, phonetic inventories, among other features of human languages) to future work. -The `ultk.language.language` submodule contains classes for constructing a language, which can contain one or more expressions. +The `ultk.language.language` submodule contains classes for constructing a language, which can contain one or more expressions. The `ultk.language.semantics` submodule contains classes for defining a universe (meaning space) of referents (denotations) and meanings (categories). """ diff --git a/src/ultk/language/semantics.py b/src/ultk/language/semantics.py index fa22384c..b87aa214 100644 --- a/src/ultk/language/semantics.py +++ b/src/ultk/language/semantics.py @@ -1,20 +1,20 @@ """Classes for modeling the meanings of a language. - Meanings are modeled as things which map linguistic forms to objects of reference. The linguistic forms and objects of reference can in principle be very detailed, and future work may elaborate the meaning classes and implement a Form class. +Meanings are modeled as things which map linguistic forms to objects of reference. The linguistic forms and objects of reference can in principle be very detailed, and future work may elaborate the meaning classes and implement a Form class. - In efficient communication analyses, simplicity and informativeness can be measured as properties of semantic aspects of a language. E.g., a meaning is simple if it is easy to represent, or to compress into some code; a meaning is informative if it is easy for a listener to recover a speaker's intended literal meaning. +In efficient communication analyses, simplicity and informativeness can be measured as properties of semantic aspects of a language. E.g., a meaning is simple if it is easy to represent, or to compress into some code; a meaning is informative if it is easy for a listener to recover a speaker's intended literal meaning. - Examples: +Examples: - >>> from ultk.language.semantics import Referent, Meaning, Universe - >>> from ultk.language.language import Expression - >>> # construct the meaning space for numerals - >>> numerals_universe = NumeralUniverse(referents=[NumeralReferent(str(i)) for i in range(1, 100)]) - >>> # construct a list of referents for the expression 'a few' - >>> a_few_refs = [NumeralReferent(name=str(i)) for i in range(2, 6)] - >>> a_few_meaning = NumeralMeaning(referents=a_few_refs, universe=numerals_universe) - >>> # define the expression - >>> a_few = NumeralExpression(form="a few", meaning=a_few_meaning) + >>> from ultk.language.semantics import Referent, Meaning, Universe + >>> from ultk.language.language import Expression + >>> # construct the meaning space for numerals + >>> numerals_universe = NumeralUniverse(referents=[NumeralReferent(str(i)) for i in range(1, 100)]) + >>> # construct a list of referents for the expression 'a few' + >>> a_few_refs = [NumeralReferent(name=str(i)) for i in range(2, 6)] + >>> a_few_meaning = NumeralMeaning(referents=a_few_refs, universe=numerals_universe) + >>> # define the expression + >>> a_few = NumeralExpression(form="a few", meaning=a_few_meaning) """ from dataclasses import dataclass