-
Notifications
You must be signed in to change notification settings - Fork 12
Expand file tree
/
Copy pathbook.tex
More file actions
916 lines (625 loc) · 94.3 KB
/
book.tex
File metadata and controls
916 lines (625 loc) · 94.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
% Possible use http://www.latextemplates.com/template/the-legrand-orange-book as template? See https://www.overleaf.com/9174958nyjxxdxbchks#/33024595/
\documentclass[11pt,letterpaper,fleqn]{memoir} % Default font size and left-justified equations
\input{formatting} % Loads the book formatting
\usepackage{tikz,pgfplots}
\usepackage[destlabel]{hyperref}
\newif\ifdraft
\drafttrue
%\draftfalse
\makeatletter
\@addtoreset{chapter}{part}
\makeatother
\newcommand{\bookversion}{3.1}
\newcommand{\bookdate}{December 31, 20??}
% Adding ability to include pdfs (used to import book cover)
\usepackage{pdfpages}
% Adding pdf index and links within the document
% Custom formatting to remove default red boxes
\usepackage{hyperref}
\hypersetup{
colorlinks=true,
urlcolor=blue,
linkcolor=blue
}
\urlstyle{same}
% ---------------------------
\usetikzlibrary{positioning,calc,arrows,arrows.meta, shapes,decorations.markings}
\begin{document}
% Set theory (expected) \in, \subset, injective/surjective, cartesian product
% Closure of a set under an operation
% Need examples of topology and sigma algebra
% Note that sigma algebra are also closed under intersections
% TODO: language change. Possibilities are experimentally DEFINED cases. Experimentally distinguishable cases give Hausdorff.
\frontmatter
\thispagestyle{empty} % Suppress headers and footers on the title page
\includepdf{BookCover.pdf}
\newpage
\thispagestyle{empty}
~
\newpage
~
\thispagestyle{empty}
\vspace{20pt}
{\large \noindent Gabriele Carcassi, Christine A. Aidala }
\vspace{60pt}
{\Huge \noindent \textbf{Assumptions of physics}}
\vspace{30pt}
\ifdraft {\large \noindent Working DRAFT for Ver \bookversion ~- \today}
\else {\large \noindent Ver \bookversion ~- \bookdate}
\fi
\vfill
%\chapter*{Assumptions of physics}
\ifdraft
\noindent \textbf{This book is a work in progress}. This draft is a development copy built on \today. It is provided as-is for the purpose of early review and feedback. You can get the latest draft from \url{https://assumptionsofphysics.org/book}.
\else
\noindent This edition was finalized on \bookdate. Older and newer versions can be found at \url{https://assumptionsofphysics.org/book}.
\fi
\newpage
~\vfill
\thispagestyle{empty}
% Copyright notice
\noindent Copyright \copyright\ 2018-26 Gabriele Carcassi, Christine A. Aidala
\vspace{12pt}
% Link to website
\noindent \textsc{assumptionsofphysics.org/book}
\vspace{12pt}
% License
\noindent Licensed under the Attribution-NonCommercial-ShareAlike 4.0 International (CC BY-NC-SA 4.0) (the ``License"). You may not use this file except in compliance with the License. You may obtain a copy of the License at https://creativecommons.org/licenses/by-nc-sa/4.0/. This work is distributed under the License ``as is'', without warranties or conditions of any kind, either express or implied.
\chapter{Preface}
This work is part of a larger research program, Assumptions of Physics (\url{https://assumptionsofphysics.org}), that aims to identify a handful of physical principles from which the basic laws can be rigorously derived. The goal is to give physics (and science more in general) a renewed foundation that is mathematically precise, physically meaningful and philosophically consistent. Given the ambition and broad scope of the task, nothing would ever be written if one were to wait for the complete picture. Therefore this work contains only the parts of the project that are considered to be mature, and so it will be revised and expanded as progress is made. We give here a brief overview of the project, which can be useful to better understand the context of this work.
\subsection{Overall goals of Assumptions of Physics}
What do the basic laws of physics describe? Why is the state of a classical particle identified by position and momentum (i.e.~a point on a symplectic manifold) while the state of a quantum system is identified by a wave function (i.e.~a vector in a Hilbert space)? What assumptions are unique to each theory? What are, instead, the basic requirements that all physical theories must satisfy? Could we have had different laws? A lack of clear answers to these questions is, we believe, the biggest obstacle in the foundations of physics and prevents the resolution of outstanding problems in the field. \textbf{Our approach is to find a minimal set of physical assumptions that are necessary and sufficient to derive the different theories within a unified framework.} If we are able to do so, then we are guaranteed that all that the laws of physics say is encoded in those assumptions and we are able to answer those questions.
We found this approach to be very fruitful. It provides new insights into physics as a whole, the role of mathematics in physical theories and gives a more solid conceptual foundation to both. It becomes clear why some mathematical structures are pervasive in science and what exactly they are meant to represent, while others will never play a role. The downside is that we have to touch many subjects in math (logic, topological spaces, measure theory, group theory, vector spaces, differential geometry, symplectic geometry, statistics, probability theory, ...), physics (Hamiltonian mechanics, Lagrangian mechanics, thermodynamics, quantum mechanics, electromagnetism, ...) and science in general (computer science theory, information theory, system theory, ...). In other words, \textbf{the only way to properly achieve the goal is to rebuild everything from the ground up:} formal rigor, physical significance and conceptual integrity are not something that can be added at the end, but they must be present from the beginning.
The main takeaway for us is that the foundations of science are one: no real progress can be made on the foundations of one subject without making progress on the foundations of others. \textbf{What is needed is a general theory of experimental science: a theory that studies physical theories.} This provides a standard framework that defines basic concepts and requirements (e.g.~experimental verifiability, granularity of descriptions, processes and states) that serve as a common basis for all theories. Each theory, then, is recovered by studying how these common objects become specialized under different assumptions. This book aims to build over time, piece by piece, this framework.
While the topic is necessarily inter-disciplinary, \textbf{this is still first and foremost a scientific book}. The material should be accessible to the mathematician and philosopher, but understand that it needs to resonate first and foremost with the experimental physicist and the engineer. The mathematical definitions and derivations are there to make the science precise, but they are not the main focus. In fact, the book is designed so that the mathematical definitions and proofs, highlighted with a green side bar, can be skipped altogether without loss of the big picture and the important details. Along the same line, the foundational discussions are there to articulate more precisely what it means to do science, so they will not indulge in other questions which may be of interest to the philosopher but not to the scientist.
\subsection{A living work}
For the project to be successful, we need to depart from some of the norms of academic research and academic publishing. For example, one typically develops his research program as a series of articles published in a peer reviewed journal that caters to a specific community. These articles are then typically collected as is or merged into a book. This does not work for this project. As journals are specialized into sometimes very narrow fields, this would create a set of disjoint articles that cater to different audiences, with no guarantee that they can fit into a unified vision. For us, the overall picture, if and how the different perspectives combine, is the most important feature. \textbf{In this sense, the book comes first and the articles are derivative works.} We need to pool expertise and ideas from a wide range of disciplines and make sure that the result makes sense from all angles.
As the goal is broad, the framework needs to evolve as new issues are solved and old ones are better understood. If one part changes, we have to make sure that everything is updated to keep conceptual consistency. \textbf{This book, therefore, is an ongoing project.} It will continue to grow organically, adding and revising chapters. As is standard practice in open source/free software communities, we need to ``release early, release often'' to gather feedback. Each new version supersedes all prior ones and will be superseded by future ones. There is therefore no ``definitive version'' in the near future as we don't expect to ``solve all of physics'' in the near future. However, the framework will tend to converge as different parts become more settled.
The upshot is that one only needs to read the latest version of this work to be current. That is, one does not need to read a scattered set of papers, which require previous knowledge of a field, and follow how the ideas have changed. Just get the latest copy of the book, and if you do find areas you can help us expand or improve, let us know!
\section{Project overview}
Here we present a summary of the whole project and the status of each part as the layout will map to the structure of this work. We divide the work based on the two main techniques we use. The first, \textbf{reverse physics}, aims to identify the fundamental ideas and assumptions by reverse engineering them from the current physical theories. The second, \textbf{physical mathematics}, aims to construct a rigorous mathematical framework from the ground up, based on the ideas and assumptions found by reverse physics.
\subsection{Reverse physics}
Reverse physics looks at the main physical theories, like classical mechanics, thermodynamics and quantum mechanics, to identify concepts that can be used to fully explain the common and different aspects of those theories. The core insight of reverse physics is that \textbf{when the math is derived from the right physical assumptions, the physical assumptions can be derived from the math}. That is, once one finds meaningful assumptions that can be shown to be equivalent to the physical laws, one can either start from the laws or the assumptions.
The standard of rigor in this part is necessarily more relaxed as we do not have a guarantee that sufficiently mature mathematical tools exist to carry out the argument in a precise fashion. For example, we have found that the idea of a unit system is linked in a fundamental way to the notion of state spaces, yet we lack a fully developed mathematical framework to model units and their dependency. The goal is to test the ideas conceptually, find those that are broad enough and necessary enough to then justify investing further time in a more rigorous approach.
The following are examples of the type of assumptions we have found to be good starting points to rederive the different theories.
\begin{description}
\item[Determinism and reversibility:] ``The system undergoes deterministic and reversible evolution." Mathematically, the physical properties of the system determine which category, in the mathematical sense, is used to describe the state space, and deterministic and reversible evolution will be an isomorphism in the category (i.e.~a bijective map that preserves the physical properties of the system). Therefore the law of evolution is not just a bijective map, but is also a linear transformation, a differentiable map or an isometry depending on the context.
\item[Infinitesimal reducibility:] ``Specifying the state of the whole system is equivalent to specifying the state of all its infinitesimal parts." For example, we can study the motion of a ball, but we can also mark a spot in red and study the motion of the mark. Knowing the evolution of the whole ball means knowing the evolution of any arbitrary spot and vice-versa. Mathematically, the state of the whole will be a distribution over the state space of the parts. It will need to be a distribution whose value is invariant under coordinate transformations. The state space of the infinitesimal parts, then, comes equipped with an invariant two-form upon which we can define such a distribution. The state space is therefore a symplectic manifold, that is,
the states of the infinitesimal parts are described by pairs of conjugate variables, which recovers phase space. If the previous assumption holds, deterministic and reversible evolution is a symplectomorphism, that is, deterministic and reversible evolution follows classical Hamiltonian mechanics. Proper handling of the time variable will give us a relativistic version of the framework without extra assumptions.
\item[Irreducibility:] ``Specifying the state of the whole system tells us nothing about its infinitesimal parts." For example, we can study the state of an electron by scattering photons off of it. But whenever a photon interacts with the electron, it interacts with the whole electron. There is no way to mark a part of an electron and study it independently from the rest. Mathematically, the state of the electron will be a distribution that evolves deterministically where the motion of each infinitesimal part cannot be further described.
\item[Kinematic equivalence:] ``Specifying the motion of the system is equivalent to specifying its state and evolution." This means that we will have to be able to re-express a distribution over kinematic variables (i.e.~position and velocity) into a distribution over state variables (i.e.~position and momentum) and vice-versa. Mathematically, the symplectic two-form will induce a symmetric tensor over the tangent space for position. This will give us a metric and will also allow us to reformulate the laws of motion according to Lagrangian mechanics. Because the transformation is linear, we are able to constrain the Hamiltonian to the one for massive particles under scalar and vector potential forces.
\end{description}
\subsection{Physical mathematics}
Physical mathematics aims to develop mathematical structures that are based on physical principles and assumptions, so that a perfect mapping exists between physical objects and their mathematical representation. The core insight of physical mathematics is that \textbf{when physical objects are mapped to the right mathematical objects, the physical requirements map to the mathematical definitions}. That is, the only way to have a perfect map between physical objects and their mathematical representation is if the mathematical axioms and definition can be justified by physical requirements.
As of now, we identified the following two core principles that serve as guidance to the development of the basic mathematical structures. As they describe requirements that any physical theory must satisfy, they are suitable to act as the foundation of a theory of scientific theories.
\begin{description}
\item[Principle of scientific objectivity.] ``Science is universal, non-contradictory and evidence based.'' This tells us that a scientific theory must be characterized by statements that are connected to experimental verification. Therefore, verifiable statements and their logic must provide a common foundation to all physical theories. Mathematically, these requirements are captured by topologies and $\sigma$-algebras over the space of the possible cases that can be identified experimentally. This part is very well developed both conceptually and mathematically.
\item[Principle of scientific reproducibility.] ``Scientific laws describe relationships that can always be experimentally reproduced.'' This tells us that physical laws are relationships between inputs and outputs of repeatable procedures. Therefore, statistical ensembles provide a common foundation to define states and processes to all physical theories. Mathematically, a space of ensembles must be a topological space that allows convex combinations (i.e. statistical mixing) equipped with an entropy function that characterizes the variability of the elements within the ensemble. This part is still being developed, and will include open problems and conjectures.
\end{description}
\section*{Current plan and status}
In this version we have added a physical mathematics chapter dedicated to ensemble spaces. For reverse physics, we plan to work on quantum mechanics. For physical mathematics, we plan to continue the work on ensemble spaces.
\section*{Changelog}
\begin{description}
\item 2025/12/30: Ver 3.0 - Added chapter on ensemble spaces. Minor updates and corrections to old chapters.
\item 2023/10/01: Ver 2.0 - Divided the work into two main parts: Reverse Physics and Physical Mathematics. Added chapter on reversing classical mechanics. Minor updates on the logic section.
\item 2021/03/08: Ver 1.0 - Updated the first three chapters with minor changes: renamed tautology to certainty and contradiction to impossibility as they characterize better their role in the framework; made more formal justifications for the basic axioms and some of the basic definitions; causal relationships are now proved to be continuous instead of assumed to be continuous. Added Part II to include the results that are not yet fully formalized, to give a sense of the future scope of the work.
\item 2019/07/07: Ver 0.3 - Reviewed first two chapters to clarify the idea of possible assignments and how contexts for function spaces are constructed.
\item 2019/02/22: Ver 0.2 - Consolidated third chapter on properties, quantities and ordering.
\item 2018/06/22: Ver 0.1 - Consolidated first two chapters that lay the foundation for the general theory.
\end{description}
\cleardoublepage % Forces the first chapter to start on an odd page so it's on the right
\tableofcontents* % Print the table of contents itself
\cleardoublepage % Forces the first chapter to start on an odd page so it's on the right
%\pagestyle{fancy} % Print headers again
\mainmatter
\part{Reverse Physics}
\begingroup
\pagestyle{plain}
\include{rp_Intro}
\newpage
\endgroup
\include{rp_ClassicalMechanics}
\part{Physical Mathematics}
\begingroup
\pagestyle{plain}
\textbf{Physical mathematics} is an approach to the mathematical foundations of physics that seeks to construct mathematical structures strictly from axioms and definitions that can be rigorously justified from physical requirements, instead of simply taking tools developed within mathematics and applying them to physics or physics-inspired problems. Physical mathematics is based on the insight that \textbf{when physical objects are mapped to the right mathematical objects, the physical requirements map to the mathematical definitions}.
If our goal is to fully rederive physical theories from physical assumptions, we need to have a precise mapping between physical objects and mathematical ones. Understanding the axioms and definitions of the mathematical tools used in a physical theory, then, is not just ``mathematical detail'' of no concern to the physicist, but rather the precise stipulation of properties that certain physical objects must have under suitable, possibly simplifying, assumptions. In this sense, there is no ``correct'' structure in a mathematical sense, because the correct structure is the one suited to the physical problem at hand.
It should be clear that mathematicians are generally ill-equipped to determine whether mathematical structures are physically significant. As David Hilbert stated, ``Mathematics is a game played according to certain simple rules with meaningless marks on paper.'' Regarding mathematical axioms, Bertrand Russell claimed, ``It is essential not to discuss whether the first proposition is really true, and not to mention what the anything is, of which it is supposed to be true.'' Mathematics knows the rules of everything but the meaning of nothing. It is therefore unreasonable to expect that the foundations of mathematics, by themselves, can provide any foundation for physics.
In the same way that elaborate correct mathematical theories stem from minimal correct mathematical theories (not elaborate incorrect mathematical theories); that large living creatures grow from small living creatures (not large dead creatures); sophisticated physically meaningful theories come from simple physically meaningful theories (and not from sophisticated meaningless ones). Meaningfulness, like correctness or aliveness, is not something that can be imposed after the fact. Therefore the only way to develop physically meaningful mathematical structures is to develop them from scratch: we cannot simply take higher level mathematical objects and ``sprinkle meaning'', an interpretation, on top.
The goal of physical mathematics, then, is to find how to turn physical assumptions into precise mathematical requirements, such that we are guaranteed to know what exactly each mathematical object represents and under which physical conditions.
\subsection{A new standard for scientific rigor}
From the above discussion, it follows that the standard of rigor mathematicians have developed for their field is not sufficient for the purpose of physical mathematics. Mathematics only deals with formal systems, whose starting points are a set of definitions and rules that are taken as is. At that point, correctness of the premise cannot be established, only self-consistency. Therefore mathematics fails to deal with the most delicate and interesting parts of the foundations of physics: the physical assumptions and how they are encoded into the formal framework. We therefore need rules and standards for rigorously handling the informal parts of the framework and, since there are no guidelines for this, we set our own standard.
We call an \textbf{axiom} a proposition that brings new objects or new properties of established objects within the formal framework. A \textbf{definition}, instead, is a proposition that further characterizes objects and properties already present in the formalism. \textbf{An axiom or a definition is well posed only when it is clear what the objects represent physically and what aspects are captured mathematically.} Therefore each axiom and definition is composed of two parts. The first characterizes the objects and properties within the informal system, tells us what they represent physically. The second part, typically preceded by ``Formally'', characterizes the part that is captured by the formal system. Axioms and definitions are followed by a \textbf{justification} when it is necessary to explain why the elements in the informal system must be mapped into the formal system in the way proposed. Some definitions are purely formal and as such do not require justifications. As this argument spans both the formal and informal systems, this cannot be a mathematical proof in the modern sense. In particular, the justification for an axiom must argue why those objects must exist.
The above standard makes sure we have a perfect identification between formal and informal objects. All mathematical symbols correspond to physical objects and all the relevant physical concepts are captured by the math. All subsequent propositions and proofs, then, can be carried out in the formal system, where it is easier to check for consistency and correctness. However, all the proofs can, if needed, be translated into the informal language and given physical meaning.
\newpage
\endgroup
\include{pm_VerifiableStatements}
\include{pm_CombinedDomains}
\include{pm_PropertiesQuantities}
\include{pm_EnsembleSpaces}
\part{Blueprints for the work ahead}
\begingroup
\pagestyle{plain}
\include{blueprint-intro}
\newpage
\endgroup
\chapter{Reverse Physics}
\section{Classical mechanics}
The work on classical mechanics is considered mostly concluded, in the sense that suitable initial assumptions have been identified. There are still a few open issues, such as the case of variable mass, the generalization of the directional DOF to the relativistic case, or clarifying the nature or the generalization to infinite DOFs (i.e. field theories).
\subsection{Curvature for particle dynamics}
The assumption of kinematic equivalence already gives us relativistic Hamiltonians. Does it also give us a relationship between the curvature of the metric tensor and the forces acting on the particles?
The setup is the following. Suppose we have two vectors in the extended phase space $d\xi^a = \{dq^\alpha, 0\}$ and $d\nu^a = \{0, dp_\alpha\}$. Using the symplectic form we have the invariant $d\xi^a \omega_{ab}d\nu^b = dq^\alpha dp_\alpha$. Under the kinematic assumption we have $dq^\alpha = dx^\alpha$ and $dp_\alpha = mg_{\alpha\beta}du^\beta + \mathfrak{q} A_\alpha$. We have $d\xi^a \omega_{ab}d\nu^b = dx^\alpha m g_{\alpha\beta} du^\beta + dx^\alpha \mathfrak{q} A_\alpha$.
Since the two terms have to match at each point and the symplectic form has the same components at each point, can we constrain the change of the components of $g_{\alpha\beta}$? The general idea would be that components of $g_{\alpha\beta}$ may have to change in space/time coordinately with $A_\alpha$ as to make $d\xi^a \omega_{ab}d\nu^b = dq^\alpha dp_\alpha$ remain the same. Note that derivatives in $q^\alpha$ are taken at constant $p_\alpha$ while derivatives in $x^\alpha$ are taken at constant $u^\alpha$.
\section{Thermodynamics}
\subsection{Process entropy}
The key to recover thermodynamics is finding a definition of entropy that applies in very general cases and recovers the usual definition. Instead of using the logarithm of the count of states, we use the logarithm of the count of possible evolutions. That is, the ways a system can evolve under a specific process. The entropy of the system is automatically relativistic (i.e. we are essentially counting ``worldlines'' of the overall system in its state space) and is process dependent (i.e. contextual).
In the case of deterministic and reversible evolution, the count of states is equivalent to the count of evolutions, and therefore the usual definition is recovered. In the case of stochastic steady state over continuous time, that is when the probability distribution stabilizes, the states will traverse infinitely many states within a small time difference $dt$. The count of evolutions, then, can be shown to reduce to the permutations of infinite sequences which recovers the Gibbs/Shannon entropy.
As for the behavior of entropy, the idea is that for a specific process, the state at a particular time identifies a set of possible evolutions. This would be the entropy of that state. Over the continuum, where states are points, the entropy would become a density of the count of evolutions. In essence, the entropy of a system at a particular time tells us how much or how little the evolution is constrained. In other words, it tells us how much the system is expected to fluctuate. As time evolves, the state changes, and the count of evolutions changes as well. If the evolution is deterministic, the evolutions can never split, in the sense that all the evolutions that end up in a particular state must all go to another state. This means that for a deterministic process the count can never decrease. If the evolution is deterministic and reversible, then the count must stay the same. This recovers the feature of entropy to be a non-decreasing quantity, which is conserved during reversible processes.
If the evolution allows equilibria, the evolutions will concentrate around states of equilibria. Given that states cannot go out of equilibrium once it has been reached, the count of evolutions is maximized at equilibrium. This recovers another feature of entropy.
Lastly, if two systems are independent, the way one evolves does not constrain the other. The total count of evolutions, then, is simply the product of the count of evolutions of the two systems. Since the entropy is the logarithm of the count of evolutions, it sums over independent systems. This recovers the last property of entropy.
\subsection{Equation of state}
If we study the space of equilibria, each state will have a well defined entropy. Therefore we have an equation of state $S(\xi^a)$ where $\xi^a$ form a set of variables that fully identify the state. Moreover, as noted before, entropy is additive under system composition of independent systems.
In a process with equilibria different evolutions must converge to the same final state, which means the process entropy increases and is maximized at equilibria. This gives the general idea that entropy increases during an irreversible process. These results are therefore valid in general, no matter what type of system is being described.
To find thermodynamics specifically, we need an additional set of assumptions. First, all states are equilibria. Second, all state variables $\xi^a$ are additive under system composition. Third, one of them, which we call internal energy $U$, is conserved under any evolution, including irreversible evolution. We can then write the equation of state as $S(U, x^i)$ and define the following quantities:
\begin{equation}
\begin{aligned}
\frac{\partial S}{\partial U} &= \beta = \frac{1}{k_B T} \\
\frac{\partial S}{\partial x^i} &= - \beta X_i
\end{aligned}
\end{equation}
We can then express the differentials as:
\begin{equation}
\begin{aligned}
dS &= \frac{\partial S}{\partial U} dU + \frac{\partial S}{\partial x^i} dx^i &= \beta dU - \beta X_i dx^i \\
dU & = T k_B dS + X_i dx^i
\end{aligned}
\end{equation}
This is essentially Gibbs' approach to thermodynamics.
\subsection{Thermodynamic laws}
To recover the laws, we need a few more definitions. We define a reservoir $R$ as a system for which the internal energy $U_R$ is the only state variable and the state entropy $S_R$ is a linear function of $U_R$. That is, $\frac{\partial S_R}{\partial U_R} = \beta_R = \frac{1}{k_B T_R}$ is a constant. We call heat $Q=-\Delta U_R$ the energy lost by the reservoir during a transition.
We define a purely mechanical system $M$ as a system for which the state entropy is zero for each state. That is, $S_M(U_M, x^i_M) = 0$. We call work $W = \Delta U_M$ the energy acquired by a purely mechanical system during a transition.
Now, consider a composite system made of a generic system $A$, a reservoir $R$ and a purely mechanical system $M$. Consider a transition where we go to a new equilibrium. Since energy is additive under system composition, let us call $U$ the total energy. Since energy is conserved we have:
\begin{equation}
\begin{aligned}
\Delta U &= 0 = \Delta U_A + \Delta U_R +\Delta U_M = \Delta U_A - Q + W \\
\Delta U_A &= Q - W
\end{aligned}
\end{equation}
Since entropy is extensive, let us call $S$ the total entropy. Since the process is going to an equilibrium, the entropy can only increase. We have:
\begin{equation}
\begin{aligned}
0 &\leq \Delta S = \Delta S_A + \Delta S_R +\Delta S_M = \Delta S_A + \beta_R \Delta U_R + 0 = \Delta S_A + \frac{-Q}{k_B T_R} \\
k_B \Delta S_A &\geq \frac{Q}{T_R}
\end{aligned}
\end{equation}
\section{Quantum mechanics and irreducibility}
Quantum mechanics can be recovered by swapping reducibility with irreducibility as shown in diagram \ref{fig_quantum_diagram}, which can be used as a guide throughout this section.
\begin{figure}[h]
\includegraphics[width=\columnwidth]{images/QuantumDiagram.png}
\caption{Assumptions for quantum mechanics}\label{fig_quantum_diagram}
\end{figure}
The assumptions lie on the left column. Each assumption leads to one or two key insights that progressively lead to the physical concepts in the middle column. Each of these is then mapped to its corresponding formal framework on the right. Note that ``quasi-static process'' and ``conserved density'' both independently lead to the same result of ``unitary evolution''.
\subsection{Irreducibility}
The state space of quantum mechanics can be recovered under the:
\begin{assump}[Irreducibility]
The state of the system is irreducible. That is, giving the state of the whole system says nothing about the state of its parts.
\end{assump}
Under this assumption the state of the system is automatically an ensemble over the state of the parts as preparation of the whole leaves the parts unspecified. For the same reason, the entropy of these ensembles must be the same, or some ensembles would provide more or less information about the parts. The whole task, then, is to characterize these ensembles without making specific assumptions on the parts.
Let $\mathcal{C}$ be the state space of the irreducible system. Let us call fragment a part of the irreducible system. The state of a fragment will be associated with a random variable uniformly distributed over the possible fragment states. As discussed in the context of classical mechanics, distributions over states must be invariant and symplectic manifolds are the only manifolds over which invariant distributions can be defined. As we cannot say anything about the state of the fragments, the dimensionality of this manifold must be irrelevant as long as it is even dimensional. For simplicity, we can choose a two-dimensional one. Therefore we are interested in the space of bi-dimensional uniform distributions formed by a pair of two random variables $A$ and $B$.
The values of the variables themselves are not relevant, as they are not physically accessible by assumption. However, the size of the system $\mu = \int \rho dA \wedge dB$ is relevant. Without loss of generality, we can rescale $A$ and $B$ such that the density $\rho$ is not only uniform but unitary: $\rho = 1$. This way the size of the system is directly proportional to the area covered by the random variables. In other words: the more fragments there are, the more each fragment can swap its state with another without changing the whole, the more uncertainty there is on the state of the fragment, the higher the variance of the random variables.
Since only linear transformations will preserve the uniform distribution, we look to those. These are translations, stretches and rotations. Translations do not lead to other physically distinguishable states since the exact values of $A$ and $B$ are not physically accessible. Stretching of the distribution will correspond to an increase of the size of the system, which is physically accessible. However, only the stretching of the area is of interest. So, without loss of generality, we can set $\sigma_A = \sigma_B = \sigma$ and we have $\mu \propto \sigma^2$. Rotations just change the correlations which, by themselves, are not physically accessible. However, under addition the correlations still result in differences in variance and, indirectly, the size of the system, and therefore are physically interesting. The space of transformations is therefore given by two parameters $a$ and $b$ such that:
\begin{equation}
\begin{aligned}
C &= a A + b B \\
D &= -b A + a B
\end{aligned}
\end{equation}
Equivalently, we can use the complex number $c = a + \imath b$ to characterize the transformation, which we can note as $\tau(c)$. The increase/decrease in size is given by $a^2 + b^2 = (a - \imath b) (a + \imath b) = c^* c$ and the change in correlation is given by the Pearson correlation coefficient $\rho_{A,\tau(c) A} = \cos \arg c$.
Putting it all together, we can characterize the state space $\mathcal{C}$ with a complex vector space. The linear combination represents the mixing of the different stochastic descriptions. Two vectors that only differ by a total phase are physically equivalent since a global change of correlation does not change the distribution.
We can define a scalar product $\langle \cdot | \cdot \rangle$ where the square norm induced corresponds to the size of the system (or equivalently to the strength of the random variable) and the phase difference corresponds to the correlations (the Pearson correlation coefficient). To see this, note the formal equivalence between the variance and norm rules under linear composition:
\begin{equation}
\begin{aligned}
\sigma^2_{X+Y} &= \sigma^2_{X} + \sigma^2_{Y} + 2 \, \sigma_{X} \sigma_{Y} \rho_{X,Y} \\
|\psi+\phi|^2&=|\psi|^2 + |\phi|^2 + 2 |\psi||\phi|\cos(\Delta \theta)
\end{aligned}
\end{equation}
The quadratic form, again, reflects the fact that the size of the system is proportional to the variance of a random variable. Since the size of the system is fixed, we use unitary vectors to represent actual states. The state of the system, then, is represented by a ray in a complex inner product space.
Lastly, we need to define an expectation operator that returns the average value for each physical quantity. This operator will have to be linear under linear combination of quantities:
\begin{equation}
E[aX + bY | \psi] = aE[X | \psi] + bE[Y | \psi].
\end{equation}
It will not be linear under linear combination of states:
\begin{equation}
E[X | \psi + \phi] \neq E[X | \psi] + E[X | \phi].
\end{equation}
Yet, it will have to be proportional to the increase in size and invariant under a total change in correlation: $E[X | \tau(c) \psi] = c^*c E[X | \psi]$. This leads us to associate to each physical quantity a linear Hermitian operator $X$ where $E[X | \psi] = \langle \psi | X | \psi \rangle$. An eigenstate $\psi_0$ of $X$ corresponds to a state where all the elements of the ensemble have exactly the same value. That is, $E[(X - \bar{x})^2 | \psi_0] = 0$.
Note that an inner product space can always be completed into a Hilbert space. This may, however, bring in objects that may not correspond to physical objects (i.e. infinite expectation for some quantities). In general, we believe it is better to regard the (possibly incomplete) inner product space as the physical state space and regard the completion as a mathematical device for calculation. For example, the Schwartz space seems more physically meaningful than the standard $L^2$ space as it gives finite expectation of all polynomials of position and momentum and, moreover, it is closed under Fourier transform.
\subsection{Process with equilibria}
The first type of process we consider is one with equilibria. The measurement process is recovered as a special case.
\begin{assump}[Process with equilibria]
Given an initial ensemble (i.e.~mixed state), the final ensemble is uniquely determined and remains the same if the process is applied again.
\end{assump}
Under this assumption, the process can be characterized by a projection operator. Let $\rho_1$ be the density matrix that characterizes a mixed state. Since the final mixed state must be uniquely determined by $\rho_1$, it will be $\mathcal{P}(\rho_1)$ for some operator $\mathcal{P}$. Similarly, if $\rho_2$ is another initial mixed state, its final operator will be $\mathcal{P}(\rho_2)$. Note that, given any observable $X$ the expectation $E[X|\rho_1] = \tr(X\rho_1)$ is the trace of $X\rho_1$. Similarly $E[X|\mathcal{P}(\rho_1)] = \tr(X \mathcal{P}(\rho_1))$.
We can always create statistical mixtures of the ensembles and we must have $E[X|a \rho_1 + b \rho_2] = a E[X|\rho_1] + b E[X|\rho_2]$ since these are classical mixtures. But since these are classical mixtures, the final state will also need to obey $E[X|a \mathcal{P}(\rho_1) + b \mathcal{P}(\rho_2)] = a E[X|\mathcal{P}(\rho_1)] + b E[X|\mathcal{P}(\rho_2)]$ for all possible $X$. Which means $\mathcal{P}(a \rho_1 + b \rho_2) = a \mathcal{P}(\rho_1) + b \mathcal{P}(\rho_2)$ Therefore the operator $\mathcal{P}$ is a linear operator. Moreover, the process applied twice must lead to the same result, which means $\mathcal{P}(\mathcal{P}(\rho)) = \mathcal{P}(\rho)$ for any $\rho$. That is, $\mathcal{P}^2 = \mathcal{P}$. Therefore $\mathcal{P}$ is a projection.
Suppose, now, that we want to measure a quantity $X$. We want the final outcome, the final ensemble, to be determined by the initial state, the initial ensemble. We also want the measurement to be consistent in the sense that, if it is repeated immediately after, it should yield the same result. Therefore the process will be a projection. We will also want that the process does not distort the quantity. That is, $E[X|\rho] = E[X|\mathcal{P}(\rho)]$. This means that the eigenstates of $X$ will correspond to equilibria of the process. Moreover, subsequent measurements must give the same value, not just the same mixture. That is, if $X_1$ is the random variable after the first instance of the process and $X_2$ is the random variable after the second instance, $P(X_2 = x| X_1 = x ) = 1$. This means that $E[(X - \bar{x})^2|\mathcal{P}(\rho)] = 0$ which means the eigenstates of $X$ are the only equilibria.
The measurement process is therefore simply a special case of a process with equilibria.
\subsection{Deterministic and reversible evolution}
The second type of process we consider is one that is deterministic and reversible, which is the same as assumption \ref{assum_detrev}.
Under this assumption, the process can be characterized by unitary evolution (i.e.~the Schrodinger equation). There are multiple different ways to see this. The first relates to the more general idea that all deterministic and reversible processes must be isomorphisms in the category of states. Since the state space is an inner product space, the isomorphism is unitary evolution.
The second, is that if there is a set of quantities $X_0$ at time $t_0$ that fully identify the state (i.e. the state is the only eigenstate of those quantities), then there must be a corresponding set of quantities $X_1$ that fully identify the state at time $t_1$. This means that the evolution maps basis to basis. Moreover, given the linearity of statistical mixtures, this will also mean that a statistical distribution over $X_0$ will have to map to the same distribution over $X_1$. Therefore the evolution must map linear combinations of that basis to the same linear combination. The evolution is a linear operator. Since the total size of the irreducible system cannot change, the operator must be unitary.
The third, is by constructing a quasi-static process from processes with equilibria, much like one does in thermodynamics. The idea is that we have an infinitesimal time step, an initial state $\psi_t$ and a final state $\psi_{t+dt}$. We want $P(\psi_{t+dt} | \psi_t ) = 1$. This means that $|\langle \psi_{t+dt} | \psi_{t} \rangle|^2 = 1$. This can happen only if the difference between initial and final states is infinitesimal. That is, $\langle \psi_{t+dt} | \psi_{t} \rangle = 1 + \imath \epsilon dt$ where $\epsilon$ is a real number. Therefore, by convention, we can write $| \psi_{t+dt} \rangle = I + \frac{H dt}{\imath \hbar} | \psi_{t} \rangle$ where $H$ is a Hermitian operator.
Putting these perspectives together, time evolution is a unitary operator which can be written as $U=e^{\frac{H\Delta t}{\imath \hbar}}$. If we start in an eigenstate of $X$, that is $X | \psi_t \rangle = x_0 | \psi_t \rangle$ we will end in an eigenstate $\hat{X} | \psi_{t + \Delta t} \rangle = x_0 | \psi_{t + \Delta t} \rangle$ of another operator $\hat{X} = e^{\frac{H\Delta t}{\imath \hbar}} X e^{- \frac{H\Delta t}{\imath \hbar}}$.
In fact:
\begin{equation}
\begin{aligned}
e^{\frac{H\Delta t}{\imath \hbar}} X e^{- \frac{H\Delta t}{\imath \hbar}}| \psi_{t + \Delta t} \rangle
&= e^{\frac{H\Delta t}{\imath \hbar}} X e^{- \frac{H\Delta t}{\imath \hbar}} U | \psi_t \rangle \\
&= e^{\frac{H\Delta t}{\imath \hbar}} X e^{- \frac{H\Delta t}{\imath \hbar}} e^{\frac{H\Delta t}{\imath \hbar}} | \psi_t \rangle \\
&= e^{\frac{H\Delta t}{\imath \hbar}} X | \psi_t \rangle \\
&= e^{\frac{H\Delta t}{\imath \hbar}} x_0 | \psi_t \rangle \\
&= x_0 U | \psi_t \rangle \\
&= x_0 | \psi_{t + \Delta t} \rangle
\end{aligned}
\end{equation}
This is consistent with assuming there is a quasi-static process that, at every $t$, has equilibria identified by $e^{\frac{H (t - t_0)}{\imath \hbar}} X e^{- \frac{H (t - t_0)}{\imath \hbar}}$. Note that, unlike thermodynamics, the equilibria during the evolution are not set by external constraints but by the system itself. That is, $X$ depends on the initial state of the system.
In this light, the measurement processes and the unitary processes can be seen as particular cases of the same type of processes, those with equilibria, which are defined as a black-box from initial to final state. This is consistent with the irreducibility assumption as the inability to describe the dynamics of the parts implicitly assumes that the dynamics of the parts is at equilibrium and sets a time-scale under which the further description of the system (i.e. non-equilibrium dynamics) would require describing the internal dynamics.
\def\eqgran{\doteq}
\def\finer{\leqdot}
\def\nfiner{\nleqdot}
\def\coarser{\geqdot}
\def\ncoarser{\ngeqdot}
\def\sfiner{\lessdot}
\def\scoarser{\gtrdot}
\chapter{Physical mathematics}
This chapter presents the areas that still need to be covered to conclude the general mathematical theory of experimental science and a summary of the preliminary work done on them.
\section{Experimental verifiability}
This first part is already well developed and has been presented in chapters one to three. Possible improvements are discussed in section \ref{sec:general_theory_extensions}.
\section{Informational granularity}
The general goal of this part is to recover elements of measure theory, differential geometry, probability theory and information theory. The central theme is the ability to compare and then quantify the granularity of the description provided by different statements. The idea is to have a single unified structure which can be, in some cases, reduced to the more familiar mathematical structures.
\subsection{Statement fineness}
Conceptually, we want to be able to compare two statements to see which one provides a more refined description, which one provides more information. For this, we need to establish a new axiom.
Note that a theoretical domain $\tdomain$ comes with a partial order $\narrower$ that indicates whether one statement gives a \textbf{narrower}, more specific, description than the other. For example:
\begin{itemize}
\item \statement{The position of the object is between 0 and 1 meters} $\narrower$ \statement{The position of the object is between 0 and 1 kilometers}
\item \statement{The fair die landed on 1} $\narrower$ \statement{The fair die landed on 1 or 2}
\item \statement{The first bit is 0 and the second bit is 1} $\narrower$ \statement{The first bit is 0}
\end{itemize}
In these cases, the first statements are ``contained'' in the second ones, which are more general.
We need to define an additional preorder $\finer : \tdomain \times \tdomain \to \mathbb{B}$ that compares two statements and tells us if the first provides a description with finer granularity than the second. Saying $\stmt_1 \finer \stmt_2$ means that the description provided by $\stmt_1$ is \textbf{finer}, gives more information, is more precise, than the description provided by $\stmt_2$. For example:
\begin{itemize}
\item \statement{The position of the object is between 0 and 1 meters} $\finer$ \statement{The position of the object is between 2 and 3 kilometers}
\item \statement{The fair die landed on 1} $\finer$ \statement{The fair die landed on 3 or 4}
\item \statement{The first bit is 0 and the second bit is 1} $\finer$ \statement{The third bit is 0}
\end{itemize}
In these cases, the first statement may not be contained or overlap with the second. The existence of this operator and its property would be an additional axiom. Fineness is a preorder, rather than an order, because it does not satisfy antisymmetry: if $\stmt_1 \finer \stmt_2$ and $\stmt_2 \finer \stmt_1$ then it is not necessarily true that $\stmt_1 \equiv \stmt_2$. In that case, we will say that the two statements are \textbf{equigranular}, noted $\stmt_1 \eqgran \stmt_2$.
Note how statements about geometry, probability and information all satisfy the same concept. In fact, each of these structures will generate a preorder on the statements. The general question is what are the necessary and sufficient conditions on the preorder to be able to recover those structures.
\subsection{Measure theory}
Conceptually, a measure allows one to assign a size to a set. For us, a theoretical set is really a statement, so we want to assign sizes to statements that represent the coarseness of the description they provide.
The construction should, roughly, proceed as follows. Let $\tdomain_X$ be a theoretical domain. We select a unit statement $\stmt[u] \in \tdomain_X$. We define, in some way, the set $\tdomain_{\stmt[u]} \subseteq \tdomain_X$ which contains all statements that are comparable to $\stmt[u]$. We then try and construct a measure $\mu_{\stmt[u]} : \tdomain_{\stmt[u]} \to \mathbb{R}$ such that $\mu_{\stmt[u]}(\stmt[u]) = 1$. By a measure, we mean that $\mu_{\stmt[u]}$ is additive over incompatible statements (i.e. disjoint sets of possibilities). That is, if $\stmt_1 \ncomp \stmt_2$, we have $\mu_{\stmt[u]}(\stmt_1 \OR \stmt_2) = \mu_{\stmt[u]}(\stmt_1) + \mu_{\stmt[u]}(\stmt_2)$. We want the measure to respect the fineness preorder, to be monotonic. That is, if $\stmt_1 \finer \stmt_2$ then $\mu_{\stmt[u]}(\stmt_1) \leq \mu_{\stmt[u]}(\stmt_2)$.
Originally, we thought that these measures would have to be always additive and therefore we starting adding suitable axioms on fineness. However, we realized that, in the context of quantum mechanics, the measure cannot be additive if it has to agree with the von Neumann entropy. Worse, it is not even monotonic (i.e. a broader statement is not necessarily coarser). More conceptual work needs to be done to understand the issue.
Note that we have essentially one measure for each equivalence class defined by fineness. This is intended. One reason a single measure is not sufficient for our work is because we need to compare statements of ``different infinities''. If we have a single measure, we can only compare objects with a finite measure. All objects with zero measure (or infinite measure) are indistinguishable. For example, we want to say:
\begin{itemize}
\item $\stmt_1$ = \statement{The horizontal position of the object is exactly 0 meters}
\item $\stmt_2$ = \statement{The horizontal position of the object is exactly 1 or 2 meters}
\item $\stmt_3$ = \statement{The horizontal position of the object is between 0.5 and 1.5 meters}
\item $\stmt_4$ = \statement{The horizontal position of the object is between 1.5 and 3.5 meters}
\item $\stmt_1 \finer \stmt_2 \finer \stmt_3 \finer \stmt_4$
\item $\stmt_1 \ncoarser \stmt_2 \ncoarser \stmt_3 \ncoarser \stmt_4$
\end{itemize}
Fineness may also capture the concept of physical dimension. In fact, two descriptions in the same units are ``finitely comparable'' in the sense that one gives a finer description than the other by a finite factor. Descriptions of different units are either ``infinitely comparable'' (e.g. areas are always bigger than lengths) or not comparable (e.g. position and momentum). Consider a two dimensional phase space of a classical system. Points should be comparable and in fact should be equigranular $\eqgran$ so that we can compare sets of finitely many points. Areas are also comparable to each other, and are comparable to points (i.e. they are infinitely bigger). However, vertical lines (i.e. ranges in momentum alone) are not comparable to horizontal lines (i.e. ranges in position alone). Symplectic geometry, in fact, gives a size to areas and not to lines. Mathematically, this should be clarified when one is trying to define the domain of the measure $\tdomain_{\stmt[u]}$.
\subsection{Probability}
Conceptually, probability is recovered as a measure restricted to a particular subset. The idea is that you take two statements, such as \statement{the die landed on 2} given that \statement{the die has 6 sides and it is fair}, and you ask what fraction of the possibilities compatible with the second is also compatible with the first. This defines the conditional probability.
Let $\stmt_1, \stmt_2 \in \tdomain$ be two theoretical statements. Then the probability of $\stmt_2$ given $\stmt_1$ is
\begin{equation}
P(\stmt_2 | \stmt_1) = \mu_{\stmt_1}(\stmt_1 \AND \stmt_2) = \frac{\mu_{\stmt[u]}(\stmt_1 \AND \stmt_2)}{\mu_{\stmt[u]}(\stmt_1)}
\end{equation}
which quantifies the fraction of possibilities compatible with $\stmt_1$ that are also compatible with $\stmt_2$.
If we take the certainty $\certainty$ as a unit, we have a probability measure for the whole space. However, since we can take different statements as a unit, we will be able to distinguish between the following cases:
\begin{itemize}
\item $P($\statement{n is odd} $|$\statement{n is picked fairly from all integers}$)=1/2$
\item $P($\statement{n is between 0 and 9} $|$\statement{n is picked fairly from all integers}$)=0$
\item $P($\statement{n is 3} $|$\statement{n is picked fairly from all integers}$)=0$
\item $P($\statement{n is 3} $|$\statement{n is between 0 and 9}$\AND$\statement{n is picked fairly from all integers}$)=1/10$
\end{itemize}
\subsection{Differentiability}
We want to construct a notion of differentials and differentiability that is the same for all spaces, even infinite dimensional ones. When introducing derivatives, this is typically done by taking limits of differences, and therefore differentiability is the existence of those limits. In differential topology, this notion is used to define differentiability of manifolds in terms of differentiability of coordinates, and then differentials are defined as linear functions of vectors. That is, the differentials defined on the coordinates of a particular chart are technically not the same objects as the differentials defined on the space.
The idea is to define differentiability on the vector space structure alone. That is, given two vector spaces $V$ and $W$, a map $f: V \to W$ is differentiable if it becomes linear in the neighborhood. We would first define a differential as a sequence of vectors $\{v_i\}_{i=1}^{\infty} \in V$ such that there exists a vector $t \in V$ and a sequence of non-zero elements $\{a_i\}_{i=1}^{\infty} \in \mathbb{R}$ that converges to $0$ for which
$$ \lim\limits_{i \to \infty} \frac{v_i}{a_i} = t.$$
We call $t$ the \textbf{tangent vector} of the differential and $\{a_i\}_{i=1}^{\infty}$ its \textbf{convergence envelope}. Note that, given a sequence $v_i$, these are not unique. We note $dv[a_i \, t]$ the differential with its tangent vector and convergence envelope. One can show that every differential can be written as $v_i = a_i t_i$ where $t_i$ converges to $t$.
We can now study how a map $f: V \to W$ maps differentials. Given a sequence $\{v_i\}_{i=1}^{\infty} \in V$, we can define $w_i = f(v_i)$. If, additionally, we have a differential $dv[a_i \, t]$, we can define the sequence $\{w_i\}_{i=1}^{\infty} = \{f(v_i + a_i t_i) - f(v_i)\}_{i=1}^{\infty}$. Now, the observation here is that if the map is linear, the sequence $\{w_i\}_{i=1}^{\infty}$ will be a differential with tangent vector $f(t)$ and convergence envelope $a_i$. But any map that is locally linear will have the same property, given that differentials are local objects. Therefore we say $f$ is differentiable at $v \in V$ if there exists a map $d_v f |_{v_0} : V \to W$ such that $\{w_i\}_{i=1}^{\infty} = dw[a_i \, d_v f |_{v_0}(t)]$.
From a preliminary study, this would work on any vector space, regardless of dimension or field (i.e. real, complex, rational, ...).
\subsection{Differential geometry/geometric measure theory}
In the reverse physics chapter about classical mechanics we have seen that forms can be understood as modeling additive functionals of subregions. We need to connect those ideas to the rest of the formal framework.
Conceptually, we want to assign quantities to regions instead of points. If we assume these quantities are additive, the idea is that we can decompose them into the sum of infinitesimal contributions at each point. Therefore the differential objects exist as the limit of infinitesimal decomposition. This, again, reflects the overall spirit of the project that compels us to start from physically well defined entities (in this case the quantities associated with finite regions) and derive the theoretical ones (in this case the infinitesimal contributions that are integrated).
Let $\tdomain_X$ be a theoretical domain and $U \in \Sigma_X$ a theoretical set. This represents the region associated to our measurement. Let $\tdomain_Y$ be a theoretical domain and $R \in \Sigma_Y$ a theoretical set. This represents the possible values found. Our starting point consists of statements like:
\begin{itemize}
\item \statement{the amount of mass inside volume $U$ is within range $R$}
\item \statement{the force applied to surface $U$ is within range $R$}
\item \statement{the energy used to move the object along the line $U$ is within range $R$}
\end{itemize}
These are finite precision statements of a quantity associated to a region of finite size.
The first step is to group statements within the same region $U$ into subdomains $\tdomain_{U\to Y}$. We can then show how the possibilities for each $\tdomain_{U\to Y}$ reduce to statements like:
\begin{itemize}
\item \statement{the amount of mass inside volume $U$ is precisely $y$}
\item \statement{the force applied to surface $U$ is precisely $y$}
\item \statement{the energy used to move the object along the line $U$ is precisely $y$}
\end{itemize}
These are infinite precision statements of a quantity associated to a region of finite size. We define $S \subseteq \Sigma_X$ as the type of region (i.e. volumes vs surfaces vs lines) upon which the functional is defined and therefore we have a functional $f : S \to Y$ which tells us the exact value of the quantity in each region.
Then we study the case where $f$ is a real linear $k$-functional, meaning:
\begin{itemize}
\item the possibilities $X$ are identified by a set of real values; that is, $X$ with the natural topology is a manifold
\item the domain is all $k$-dimensional surfaces $S^k$; that is, the submanifolds of dimension $k$
\item the co-domain is the reals; so we have $f : S^k \to \mathbb{R}$
\item the functional is additive over disjoint sets; that is, $F(U_1 \cup U_2) = F(U_1) + F(U_2)$ if $U_1 \cap U_2 = \emptyset$
\item the functional commutes with the limit; that is, $\lim\limits_{i \to \infty} F(U_i) = F(\lim\limits_{i \to \infty} U_i)$
\end{itemize}
Under these conditions (and possibly others) one can express the functional as a sum of infinitesimal contributions. That is, $f(U) = \int_U \omega(dU)$, where $\omega$ represents a suitable $k$-form.
Note that there is not a unique way to perform this decomposition. For example, if $f(U)$ is the total mass in the volume, $\omega(dU)$ is the density in the infinitesimal volume. If we change the density at a single point, the integral does not change and only the integral is physical. These are the types of issues that still need to be solved.
\textbf{Stokes' theorem and exterior derivatives.} One interesting application of this viewpoint is that we can understand things like Stokes' theorem, exterior derivative and the difference between closed and exact forms directly on the finite functionals.
Let $\partial : S^k \to S^{k-1}$ be the boundary operator that, given a surface $\sigma^k$, returns the boundary $\partial \sigma^k$ which is of dimension $k-1$. We have $\partial\partial\sigma = \emptyset$ for any surface of any dimensionality.
Let $F_k$ be the space of linear $k$-functionals. We can define the boundary functional operator $\partial : F_k \to F_{k+1}$ such that $\partial f(\sigma) = f(\partial \sigma)$. That is, given a functional that acts on $k$-surfaces we can always construct one that acts on $k+1$-surfaces by taking the boundary of the $k+1$-surface and giving it to the first functional. Note that $\partial\partial f(\sigma) = \partial f(\partial\sigma) = f(\partial\partial\sigma) = f(\emptyset) = 0$, so the boundary functional of the boundary functional is the null functional, the one that returns zero for every $k$-surface. What we should be able to prove is that if $\omega$ is the $k$-form associated with $f$, $d\omega$ is the $k+1$-form associated with $\partial f$. In other words, Stokes' theorem essentially becomes a definition of the boundary functional and the calculation of the expression for $d\omega$.
We say a surface is contractible if it can be reduced to a point with a continuous transformation. A functional is closed if it is zero for all closed contractible surfaces. It is exact if it is zero for all closed surfaces. All boundary functionals are exact since $\partial f (\partial \sigma) = f (\partial\partial \sigma) = 0$. The form associated to a closed functional will be closed while the form associated to an exact functional will be exact.
\section{States and processes}
The general goal of this part is to give general definitions of states and processes that are always valid and are captured by a fundamental mathematical framework. Different theories would then specialize these basic definitions for different circumstances.
\subsection{Processes}
A process is an experimental domain $\edomain[P]$ that contains all the possible statements of the systems under study for all possible times. We call evolutions the possibilities $E$ of the domain, as they represent the complete description of all systems at all times.
We define a time parameter $t \in T \subseteq \mathbb{R}$. We group all statements relative to a system of interest at a particular time into a time domain $\edomain_t$. We call snapshots the possibilities $X_t$ of each time domain. A possible trajectory is a sequence $\{x_t\}_{t \in T}$ such that $x_t \in X_t$ for all $t \in T$ and $e \narrower \bigAND\limits_{t \in T} x_t$ for some $e \in E$. That is, there is an evolution for which the system will be described by that sequence of snapshots.
A process is deterministic if for all possible trajectories $x_{t_0} \narrower x_{t_1}$ for all $t_0 \leq t_1$. A process is reversible if for all possible trajectories $x_{t_1} \narrower x_{t_0}$ for all $t_0 \leq t_1$. Recall that narrowness between the possibilities of two domains means there is an experimental relationship. Therefore, if the process is deterministic, we can write a causal relationship $f : X_{t_0} \to X_{t_1}$ such that $x_0 \narrower f(x_0)$.
Once we derive a measure $\mu_{\stmt[u]} : \tdomain[P]_{\stmt[u]} \to \mathbb{R}$, we can define the evolution entropy as $\log \mu_{\stmt[u]}$. As the measure is multiplicative for independent systems, the evolution entropy will be additive making it an extensive property. The evolution entropy of a system at a time is defined to be the evolution entropy $\log \mu_{\stmt[u]}(x_t)$ of the snapshot at that time. Under a deterministic process, the evolution entropy can never decrease: $\log \mu_{\stmt[u]}(x_{t_0}) \leq \log \mu_{\stmt[u]}(x_{t_1})$ since $x_{t_0} \narrower x_{t_1}$ for all $t_0 \leq t_1$ and therefore $\mu_{\stmt[u]}(x_{t_0}) \leq \mu_{\stmt[u]}(x_{t_1})$. If the process is also reversible, then $\log \mu_{\stmt[u]}(x_{t_0}) = \log \mu_{\stmt[u]}(x_{t_1})$.
These definitions give a very general setting to describe a process and already find a quantity that cannot decrease during deterministic evolution.
\subsection{States}
Conceptually, states represent description of the system, and only of the system, regardless of time. Therefore the state space is not a set of statements, but a ``template'' for a set of statements that can be ``instantiated'' at different times.
The idea is that a state space $\mathcal{S}$ comes equipped with a function $\iota : \mathcal{S} \times T \to \tdomain[P]$ such that $\iota(\mathcal{S}, t) = \tdomain_t$. That is, it maps the state space and its statements to the particular time domain that represents the system at that particular time. Specifically, states of the system will be mapped to snapshots of the system.
The structure of the state space will not be, in general, isomorphic to each particular time domain. In a particular process at a particular time some states may not be accessible, so some states will be mapped to an impossibility. Or there may be correlations with other system, so the snapshot will provide more information (will be narrower) than the states themselves.
The relationships defined on the state space will be equivalent to the ones in the time domain if and only if the time domain of the system is independent from the time domain of the other systems. In other words: the state space represents the system and its properties when the system is independent. This also means that, to be able to define a system, we need to have a process that renders it independent from other systems.
When the system is independent from all others, the description is coarser than in the case of when there are correlations. Note that to a coarser description is associated a higher process entropy. Processes that render the system independent are exactly the ones that maximize the process entropy. We can associate a state entropy to each state, which is the process entropy associated to that description when the system is independent.
While it is still not clear what can be derived and what must be imposed, the overall goal is to understand what assumptions are needed to construct state spaces. One result should be that processes that isolate the system are implicitly needed, which forms the basis of requiring entropy maximization. All states are therefore equilibria of those processes (i.e. symmetries of the group of processes). Conceptually, this maps well with all branches of physics as all state spaces come equipped with some structure which, in the end, is connected to entropy/probability/measure.
\section{Open questions and possible extensions}\label{sec:general_theory_extensions}
Here we note some thoughts and ideas about open problems and possible extensions to the general theory.
\subsection{Homogeneity of an experimental domain}
It may be interesting to characterize some notion of homogeneity that makes all possibilities in a domain ``equally verifiable'', that no possibility is ``special'' compared to the others in terms of experimental verifiability. For example:
\begin{itemize}
\item the ``extra-terrestrial life" domain is not homogeneous because one possibility can be verified while the other cannot
\item the integers and reals are the only linearly ordered quantities where all contingent statements are the same experimentally: all decidable and none decidable
\item phase transitions are special, as knowing whether a system is in a mixed state is decidable, so a domain with phase transitions is not homogeneous
\end{itemize}
It is not clear how this notion should be implemented and how exactly it would be useful. It may give a reason to expect a complete domain (the residual possibility is the only one that is not compatible with any contingent verifiable statement, so the domain would not be homogeneous) and also that all possibilities are approximately verifiable (if one is able to prove that, in any domain, at least one possibility is approximately verifiable).
\subsection{Predictive relationships}
Another way to characterize relationships between domains could be in terms of predictions, what statements of one domain can tell about the other. That is, we give a theoretical statement on one and look for the best prediction (i.e. narrowest theoretical statement broader than the original) for the other.
For example, if a domain is independent from another, any theoretical statement should predict the certainty on the other. If a domain is dependent on another, any theoretical statement should predict an equivalent statement.
A possible approach. Let $\edomain_X$ and $\edomain_Y$ be two experimental domains and $\tdomain_X$ and $\tdomain_Y$ their respective theoretical domains. Now we construct the function $\pi: \tdomain_X \to \tdomain_Y$ such that given $\stmt_X \in \tdomain_x$ and $\stmt_Y \in \tdomain_Y$ such that $\stmt_X \narrower \stmt_Y$ we always have $\stmt_X \narrower \pi(\stmt_X) \narrower \stmt_Y$. In other words, it should map to the narrowest broader statement in $\tdomain_Y$.
In principle, we can even extend $\pi : \logCtx \to \tdomain_Y$ to be defined on the whole context. In that case, $\pi$ can be proven to be a projection. This map should be able to characterize the relationship between domains. For example, if $\pi(\tdomain_X) = \{ \certainty, \impossibility \}$ then the domains should be independent. If $\pi(\tdomain_X) = \tdomain_Y$ the domains should be dependent.
\subsection{Defining structures on experimental domains}
Some mathematical structures are defined on points (i.e. vector spaces, ordering) and others on their $\sigma$-algebras. In our context, verifiable statements are the only elements that are actually physical, therefore it would be nice to always define the structures on the experimental domain (i.e. the topology) and show that it induces a unique structure on the theoretical statements and possibilities (and vice-versa).
We have already implemented this approach in a couple of areas. Theoretical domains are constructed from experimental domains (see \ref{pm-vs-defTheoreticalDomain}) and so are the possibilities (see \ref{pm-vs-defPossibilities}). Theorem \ref{pm-cd-experimentalRelationshipTheorem} shows that causal relationship on the possibilities is equivalent to an inference relationship on the verifiable domain. Theorem \ref{pm-pq-domainOrderingTheorem} shows that ordering of the possibilities is equivalent to the ordering of the basis according to narrowness.
We need to understand how this can be achieved for other structures, such as measures, metrics, groups, vector spaces, inner products, ...
\subsection{Space of possible combined domains}
It should be possible to better characterize the space of all possible combined domains. As we show in \ref{pm-cd-relationshipDomainIsContinuousFunctions} that the space of the possible experimental relationships is the space of topologically continuous functions, there should be an analogue for the space of all possible combined domains. For example, one should be able to show that the combined domain is an immersion within the product topology. Is that the only constraint? How can that be characterized? Can we create an experimental domain to distinguish them?
\subsection{Limited precision}
One area we could explore for new physical ideas is what happens if we assume that the precision cannot be arbitrarily decreased. How is it different from the continuous case? Here are some preliminary ideas.
The limited precision case cannot simply lead to a discrete topology. The standard topology of the reals is not the limit of the integer topology since it is not discrete. Most likely, the limited precision case will need to have uncountable possibilities so that the limit to arbitrary precision can work well.
The main cause of confusion is that, in the continuous case, whether the precision of two statements overlap determines whether the statements are compatible. For example, ``the position is between 0 and 1 meters" and ``the position is between 2 and 3 meters" are both incompatible and not overlapping. This cannot be the case for limited precision. The possibilities themselves must be incompatible with each other but some of them must overlap, or we would simply have a discrete topology. That is, suppose that 1 unit is the precision limit, the statements ``the position is between 0 and 1" and ``the position is between 0.5 and 1.5" are incompatible because if we verify one we cannot verify the other. If we could verify them both, we would measure at a smaller precision. So, overlapping cannot be defined in terms of incompatibility.
Whether two statements overlap cannot be determined through incompatibility but must be recovered from the precision of the disjunction. Suppose we have the following arbitrary precision statements.
\begin{description}
\item $\stmt_1=$\statement{the position is between 0 and 1 meter}
\item $\stmt_2=$\statement{the position is between 0.5 and 1.5 meter}
\item $\stmt_3=$\statement{the position is between 2 and 3 meter}
\end{description}
The precision associated to all statements will be one meter. The precision for $\stmt_1\OR\stmt_2$ will be one meter and a half while the precision for $\stmt_1\OR\stmt_3$ will be two meters. That is: the precision for non overlapping statements sums. It may even be the case that if the precision sums, statements must be incompatible but the converse is what fails.
This means that the measure we put on the possibilities cannot represent the precision anymore. That is, $d\mu \neq dx$. We can imagine a relationship like $dx^2 = d\mu^2 + 1$. This would both make the precision go to 1 when the measure goes to 0 and $dx \simeq d\mu$ for large $\mu$.
\ifdraft
\include{rp_QuantumMechanics}
\fi
\iffalse
\chapter{Level of detail}
\section{Granularity}
So far we have studied the properties and constructions that can be defined on top of logical consistency and experimental verifiability. We will now introduce another primary attribute of statements, the idea that we can compare the level of detail for the description they provide.
We will see that the granularity, the level of detail provided by a statement, cannot be defined in terms of the concepts we previously introduced. We will therefore introduce a new axiom which will allow us to determine which statements provide a finer description and which statement provide the same granularity.
Statement narrowness allows us to say that the statement \statement{the temperature is between 22 and 23 Celsius} is more precise than \statement{the temperature is between 20 and 25 Celsius} but it does not tell us how it compares to \statement{the temperature is between 23 and 25 Celsius}. That is, it can compare only statements that are fully contained in one another, and not statements that are overlapping or incompatible. Ideally, we want to say that \statement{the temperature is between 23 and 25 Celsius} is coarser than \statement{the temperature is between 22 and 23 Celsius}. Saying that the first statements, in fact, is true constrains us to more possibilities than saying that the second is true. It would seem we simply need to keep track quantity intervals or possibility counts, but this does not work. Let's understand what the problems are.
We may think that, at least in the discrete case, we could simply count the possibilities compatible with each statement: the more possibilities, the more the statement is coarser. If two statements are the disjunctions of the same number of possibilities they are of the same granularity. This does not always work. For example, suppose we have a bowl that can contain apples. The domain that counts how many apples are in the bowl will have \statement{there are 2 apples in the bowl} and \statement{there are 5 apples in the bowl} as possibilities. We would be tempted to say they provide the same level of description. Now suppose we extend the domain to count how many of the apples are green. This will have \statement{there are 2 apples in the bowl 1 of which is green} and \statement{there are 5 apples in the bowl 3 of which are green} as possibilities. We would be tempted to say that these too provide the same level of description as each other. But because the two quantities are not independent (i.e. you cannot have more green apples than apples in the bowl) this cannot work. The statement \statement{there are 2 apples in the bowl} would be equivalent to the disjunctions of 3 possibilities in the combined domain: 0, 1 or 2 green apples. The statement \statement{there are 5 apples in the bowl} would be equivalent to the disjunction of 6 possibilities in the combined domain: 0, 1, 2, 3, 4 or 5 green apples. Therefore, it would seem that the second statement covers twice as many cases of the first, so it should be coarser than the first. But if we counted the possibilities in the first domain they were equigranular. So the issue that, depending on the domain, the same statement can break up into a different number of possibilities. Something else needs to tell us which possibilities provide the same level of description.
%TODO: add picture for the apple example
In the continuous case, simply using the cardinality of the possibilities would lead us to say that \statement{the temperature is between 22 and 23 Celsius} and \statement{the temperature is between 20 and 25 Celsius} give us similar description: they both correspond to continuously many possibilities. We may think that using the numeric interval would work, but this also fails in unexpected ways. For example, we may be tempted to say that \statement{the horizontal position of the ball is between 0 and 1 meters} and \statement{the vertical position of the ball is between 0 and 1 meters} provide the same level of description, that they are, as we'll say, equigranular. But suppose the ball is constrained by walls, such that the horizontal position must be within 0 and 1 meters. Then, in this case, the first statement will always be true and tells us nothing about the system. Saying \statement{the vertical position of the ball is between 0 and 1 meters} will be equivalent to saying \statement{the vertical position of the ball is between 0 and 1 meters and the horizontal position is between 0 and 1 meters}. In this case, the first statement is narrower than the second and therefore must correspond to a finer description. As another example, suppose we give the position of a ball in polar coordinates over an infinite plane. The statement \statement{the radial distance is between 0 and 100 meters} corresponds to a circle of radius one. The statement \statement{the angle is between 0 and $\pi$} corresponds to half a plane. The first statement, then, is infinitely finer than the second, even though the numerical values seem to have a finite ratio. Numerical interval, then, don't tell the whole story and can be misleading.
%TODO: add picture for position example
The point is that comparing the granularity of two statements cannot be defined simply on the concepts we have already introduced. It depends on the semantic content and the relationships defined by the context. It requires additional information. It requires an additional axiom.
We will assume that each logical context $\logCtx$ allows us to compare two statements $\stmt_1$ and $\stmt_2$ and say whether the description of one is finer, more detailed, provides more information, than the other. In that case, we would write $\stmt_1 \finer \stmt_2$ meaning the first statement is finer than the second. The finer statement will be the ``smaller'' one. Note the dot in the symbol to remind ourselves we are comparing the granularity of the statements. Note that we can still have two statements, like \statement{this animal is a cat} and \statement{the speed of that particle is between 2 and 3 $m/s$}, that are not comparable: neither is finer than the other.
If two statements are comparable by narrowness, fineness should faithfully reflect the relationship. Therefore, if $\stmt_1$ is narrower than $\stmt_2$ than its description is at a finer level of detail and therefore $\stmt_1$ is also finer than $\stmt_2$. Conversely, if $\stmt_1$ is strictly narrower than $\stmt_2$, then $\stmt_2$ cannot be finer $\stmt_1$. In the same way, fineness must be compatible with negation: if $\stmt_1$ is finer than $\stmt_2$ than $\NOT \stmt_2$ will be finer than $\NOT \stmt_1$. Defining the level of description of a statement, in fact, means defining the level of description of its negation as well. Additionally, the fineness relationship will have to be transitive: if $\stmt_1 \finer \stmt_2$ and $\stmt_2 \finer \stmt_3$ then $\stmt_1 \finer \stmt_3$. These are the only requirements we need to add to our framework.
% TODO: not clear we need this. We also require it to respect negation: if one description is finer than another, then the
\begin{mathSection}
\begin{axiom}\label{4_axiom_fineness}
Given two statements $\stmt_1, \stmt_2 \in \logCtx$ we say $\stmt_1$ is \textbf{finer} than $\stmt_2$ (noted $\stmt_1 \finer \stmt_2$) if the description it provides is at least at the same level of detail. Formally, a logical context $\logCtx$ comes equipped with a binary relationship $\finer : \logCtx \times \logCtx \to \Bool$ such that:
\begin{itemize}
\item it reflects narrowness; that is, if $\stmt_1 \narrower \stmt_2$ then $\stmt_1 \finer \stmt_2$ and if $\stmt_2 \sbroader \stmt_1$ then $\stmt_2 \nfiner \stmt_1$
\item it is compatible with negation; that is, if $\stmt_1 \finer \stmt_2$ then $\NOT \stmt_2 \finer \NOT \stmt_1$
\item it is transitive if $\stmt_1 \finer \stmt_2$ and $\stmt_2 \finer \stmt_3$ then $\stmt_1 \finer \stmt_3$.
\end{itemize}
\end{axiom}
\begin{prop}
Statement fineness satisfies the following properties:
\begin{enumerate}
\item reflexivity: $\stmt \finer \stmt$
\item transitivity: if $\stmt_1 \finer \stmt_2$ and $\stmt_2 \finer \stmt_3$ then $\stmt_1 \finer \stmt_3$
\end{enumerate}
and is therefore a preorder.
\end{prop}
\begin{proof}
For reflexivity we have $\stmt \narrower \stmt$ and therefore $\stmt \finer \stmt$. Transitivity is assured by axiom \ref{4_axiom_fineness}.
\end{proof}
\begin{coro}
Statement fineness is monotonic with respect to statement narrowness. That is, $\stmt_1 \narrower \stmt_2$ implies $\Id(\stmt_1) \finer \Id(\stmt_2)$ where $\Id : \logCtx \to \logCtx$ is the identity function over $\logCtx$.
\end{coro}
\begin{proof}
Monotonicity is guaranteed by axiom \ref{4_axiom_fineness}.
\end{proof}
\end{mathSection}
From this new axiom we define equigranularity, the notion that two statements provide the same level of description, the same information. We can show that equigranularity is an equivalence relationship which is less restrictive than statement equivalence. That is, two equivalent statements, like \statement{the temperature is between 0 and 1 Celsius} and \statement{the temperature is between 273.15 and 274.15 Kelvin}, are also equigranular but two equigranular statements, like \statement{the temperature is between 0 and 1 Celsius} and \statement{the temperature is between 1 and 2 Celsius}, are not equivalent.
We can also show that fineness is a partial order, not just on the statements, but on equivalent and equigranular classes. We can imagine partitioning the logical context into groups of statements that are equigranular, and fineness will provide a partial order for those sets. If we partition the logical context into groups of equivalent statement, each group will fall into an equigranular group, and will therefore be ordered by fineness as well.
\begin{mathSection}
\begin{defn}
Two statements are \textbf{comparable} if the level of detail of their description can be compared. Formally, $\stmt_1$ and $\stmt_2$ are comparable if $\stmt_1 \finer \stmt_2$ or $\stmt_2 \finer \stmt_1$. Given a set of statement $S \subseteq \logCtx$ and an element $\stmt[u] \in S$, we define $S^{\stmt[u]} = \{ \stmt \in S \, | \, \stmt \text{ comparable to } \stmt[u] \}$.
\end{defn}
\begin{defn}
Two statements are said \textbf{equigranular} (noted $\stmt_1 \eqgran \stmt_2$) if the description they provide is at the same level of detail. Formally, $\stmt_1 \eqgran \stmt_2$ if $\stmt_1 \finer \stmt_2$ and $\stmt_2 \finer \stmt_1$.
\end{defn}
\begin{coro}
If $\stmt_1 \eqgran \stmt_2$ then $\NOT \stmt_1 \eqgran \NOT \stmt_2$.
\end{coro}
\begin{proof}
From $\stmt_1 \eqgran \stmt_2$ we have $\stmt_1 \finer \stmt_2$ and $\stmt_2 \finer \stmt_1$ by definition. From \ref{4_axiom_fineness} we have $\NOT \stmt_2 \finer \NOT \stmt_1$ and $\NOT \stmt_1 \finer \NOT \stmt_2$ and therefore $\NOT \stmt_1 \eqgran \NOT \stmt_2$.
\end{proof}
\begin{prop}
Statement equigranularity satisfies the following properties:
\begin{itemize}
\item reflexivity: $\stmt \eqgran \stmt$
\item symmetry: if $\stmt_1 \eqgran \stmt_2$ then $\stmt_2 \eqgran \stmt_1$
\item transitivity: if $\stmt_1 \eqgran \stmt_2$ and $\stmt_2 \eqgran \stmt_3$ then $\stmt_1 \eqgran \stmt_3$
\end{itemize}
and is therefore an equivalence relationship.
\end{prop}
\begin{proof}
For reflexivity, we have $\stmt \narrower \stmt$ which implies $\stmt \finer \stmt$ and therefore $\stmt \eqgran \stmt$. For symmetry, $\stmt_1 \eqgran \stmt_2$ implies $\stmt_1 \finer \stmt_2 \finer \stmt_1$ which also implies $\stmt_2 \eqgran \stmt_1$. For transitivity, suppose $\stmt_1 \eqgran \stmt_2$ and $\stmt_2 \eqgran \stmt_3$. Then $\stmt_1 \finer \stmt_2 \finer \stmt_3 \finer \stmt_2 \finer \stmt_1$ which means $\stmt_1 \eqgran \stmt_3$.
\end{proof}
\begin{coro}
Statement equigranularity is also an equivalence relationships among equivalence classes of statements. That is, $\stmt_1 \eqgran \stmt_2$ for all $\stmt_1,\stmt_2 \in \logCtx$ such that $\stmt_1 \equiv \stmt_2$.
\end{coro}
\begin{proof}
Suppose $\stmt_1 \equiv \stmt_2$. Then $\stmt_1 \narrower \stmt_2 \narrower \stmt_1$ which means $\stmt_1 \finer \stmt_2 \finer \stmt_1$ and $\stmt_1 \eqgran \stmt_2$.
\end{proof}
\begin{coro}
Statement fineness induces a partial order over the equigranular classes of statements. That is, let $\logCtx_{/\eqgran}$ be quotient of $\logCtx$ by $\eqgran$, let $[\stmt] \in \logCtx_{/\eqgran}$ denote the equivalence class for $\stmt \in \logCtx$. Define $[\stmt_1] \finer [\stmt_2]$ if and only if $\stmt_1 \finer \stmt_2$. Then $\finer : \logCtx_{/\eqgran} \times \logCtx_{/\eqgran} \to \Bool$ is a partial order.
\end{coro}
\begin{proof}
For reflexivity, $[\stmt] \finer [\stmt]$ since $\stmt \finer \stmt$. For anti-symmetry, suppose $[\stmt_1] \finer [\stmt_2]$ and $[\stmt_2] \finer [\stmt_1]$, then $\stmt_1 \finer \stmt_2 \finer \stmt_1$ and therefore $\stmt_1 \eqgran \stmt_2$ which means $[\stmt_1] = [\stmt_2]$. For transitivity, $[\stmt_1] \finer [\stmt_2] \finer [\stmt_3]$ means $\stmt_1 \finer \stmt_2 \finer \stmt_3$, and therefore $\stmt_1 \finer \stmt_3$ and $[\stmt_1] = [\stmt_3]$.
\end{proof}
\end{mathSection}
This seemingly simple axiom is what allows us to open the door to disparate concepts such as geometry (comparing level of detail in terms of areas and volumes), information theory (comparing level of detail in terms of bits), probability theory (fraction of possibilities that correspond to one statement) and determinism and reversibility (how the level of detail changes through evolution). What we'll see is that all these different mathematical tools in the end are characterizing granularity in different situations.
It is important to understand why the partial order we are introducing, as a mathematical tool, is more general and more powerful than notions of distances, volume, information or probability that, in the end, quantify everything as a real number. Suppose we wanted to assign sizes to geometrical objects within a three dimensional space. We may assign a volume to each three dimensional shape, which would tell us which one is bigger than the other. If we extend this to two dimensional shapes, though, we would assign zero to all of them, so they would look all of the same size. If we assigned areas, we could now tell which surface is greater than the other, but now all volumes would be assigned infinity and all the lines would be assigned zero. There is a clear hierachy here, all lines are smaller then all areas, which are all smaller than all volumes. Within each group we can compare with a finite ratio, but not across groups. A real number, then, can only compare objects within a group, while the relationships of bigger and smaller go across those groups. That is why an ordering relationship is more fundamental: we can use it to compare more cases. Real numbers are useful to quantify the ordering within a specific class of objects.
Also: partial order allows for some quantities to not be comparable.
\section{Next}
The general idea is that if two domains are such that each point can go to any other point through a relationship, then all points and all verifiable statements are granularity-comparable.
\begin{defn}
An experimental domain $\edomain_X$ is \textbf{uniform} if all possibilities are equigranular. That is, $x_1 \eqgran x_2$ for all $x_1, x_2 \in X$.
\end{defn}
\begin{defn}
Let $\stmt[u] \in \tdomain_X$ and let $\tdomain_X^{\stmt[u]}$ be the set of comparable statements. A \textbf{measure} on $\tdomain_X$ with unit $\stmt[u]$ is a map $\mu_{\stmt[u]} : \tdomain \to \mathbb{R}$ such that:
\begin{itemize}
\item $\mu_{\stmt[u]}(\stmt[u]) = 1$
\item $\mu_{\stmt[u]}(\stmt_1) \leq \mu_{\stmt[u]}(\stmt_2)$ if $\stmt_1 \finer \stmt_2$
\item if $\stmt_1 \ncomp \stmt_2$ then $\mu_{\stmt[u]}(\stmt_1 \OR \stmt_2) = \mu_{\stmt[u]}(\stmt_1) + \mu_{\stmt[u]}(\stmt_2)$
\end{itemize}
\end{defn}
\begin{prop}
Let $\edomain$ be a uniform experimental domain. Let $\stmt_1 \in \tdomain$ be a statement compatible with infinitely many possibilities. Then we can find $\stmt_2, \stmt_3 \in \tdomain$ such that $\stmt_2 \ncomp \stmt_3$, $\stmt_2 \eqgran \stmt_3$ and $\stmt_1 \equiv \stmt_2 \OR \stmt_3$.
\end{prop}
\begin{proof}
\end{proof}
\begin{thrm}
Let $\edomain$ be an experimental domain where all statements are comparable to each other. Then given $\stmt[u] \in \tdomain$ we can find $\mu_{\stmt[u]} : \tdomain \to \mathbb{R}$ such that:
\begin{itemize}
\item $\mu_{\stmt[u]}(\stmt[u]) = 1$
\item $\mu_{\stmt[u]}(\stmt_1) \leq \mu_{\stmt[u]}(\stmt_2)$ if and only if $\stmt_1 \finer \stmt_2$
\item if $\stmt_1 \ncomp \stmt_2$ then $\mu_{\stmt[u]}(\stmt_1 \OR \stmt_2) = \mu_{\stmt[u]}(\stmt_1) + \mu_{\stmt[u]}(\stmt_2)$
\end{itemize}
\end{thrm}
\begin{proof}
This can be posed as a question on measurable spaces. Imposing a measure on a sigma algebra tells us which sets are equal size. Can this be reversed? That is, if we impose which sets are equal size (compatibly with set inclusion), can we get a measure?
\end{proof}
\begin{coro}
The measure $\mu_{\stmt[u]}$ is unique only if all possibilities are equigranular.
\end{coro}
\begin{proof}
The issue is that if the possibilities are only comparable, then we can't tell how much the measure of one should be bigger than the other since we can't find finer statements to measure the difference. Therefore we can change the value associated to the possibilities without breaking the ordering. If all possibilities are equigranular, then the finest statements must be given all the same size.
\end{proof}
We can quantify the accuracy numerically only if the possibilities of the domain are equigranular.
Notes: we want to say we can define a measure on a set of comparable statements. In particular, we want all possibilities to be comparable, or we will not be able to say whether we can have a relationship between the points. In fact, if two possibilities are not comparable, the cannot be put in a causal relationship. We want verifiable statements to be comparable, or they cannot be put into an inference relationship.
The fact that each possibility is comparable with each verifiable statement is a consequence that every verifiable statement is narrower than at least one possibility.
\fi
\appendix
\part{Appendix}
\chapter{Reference sheets for math and physics}
\section{Set theory}
\begin{tabular}{p{0.2\textwidth} p{0.3\textwidth} p{0.5\textwidth}}
& Name & Meaning \\
\hline
$A = \{1,2,3\}$ & set & a collection of elements\\
\hline
$\mathbb{N} = \{0, 1, 2, ...\}$ & natural numbers & the set of numbers one uses to count \\
\hline
$\mathbb{Z} = \{.., -1, 0, 1, ..\}$ & integers & the set of all whole numbers \\
\hline
$\mathbb{Q}$ & rationals & the set of all fractions \\
\hline
$\mathbb{R}$ & reals & the set of numbers with infinite precision \\
\hline
$\mathbb{C}$ & complex & the set of numbers that represent a two dimensional vector or rotation \\
\hline
$a \in A$ & in & whether the element $a$ is contained in $A$ \\
\hline
$A \subseteq B$ & subset & a set that only contains elements of the other set\\
\hline
$A \subset B$ & proper subset & a set that only contains elements of the other set but not all of them; it is a subset but is not the same set\\
\hline
$A \supseteq B$ & superset & a set that contains all elements of the other set\\
\hline
$A \supset B$ & proper superset & a set that contains all elements of the other set but not just them; it is a superset but is not the same set\\
\hline
$A \cup B$ & union & the set of all elements contained in either sets\\
\hline
$A \cap B$ & intersection & the set of all elements contained in both sets \\
\hline
$A \setminus B$ & subtraction & the set of elements in $A$ that are not in $B$ \\
\hline
$A^C$& complement & the set of all elements that are not in $A$ \\
& & it is equal to $A \setminus U$ where $U$ is the set of all elements, which depends on context \\
\hline
$A \times B$ & Cartesian product & the set of all ordered pairs $(a, b)$ with $a \in A$ and $b \in B$ \\
\hline
$2^A$ & power set & the set of all possible subsets of $A$ \\
\hline
\end{tabular}
\begin{tabular}{p{0.2\textwidth} p{0.3\textwidth} p{0.5\textwidth}}
& Name & Meaning \\
\hline
$f : A \to B$ & function & a map that for every element $A$ returns an element of $B$ \\
\hline
& injective function & a function that every distinct element of $A$ map that for every element $A$ returns an element of $B$ \\
\hline
$B^A$ & & the set of all possible functions $f : A \to B$ \\
\hline
$C(A,B)$ & & the set of all continuous functions $f : A \to B$ \\
\hline
\end{tabular}
\backmatter
\chapter[Credits]{\centering Credits}
\begin{table}[h]
\centering
\begin{tabular}{>{\raggedleft}p{0.5\textwidth} >{\raggedright\arraybackslash}p{0.5\textwidth}}
Created by: & Gabriele Carcassi \\
Written by: & Gabriele Carcassi and Christine A. Aidala \\
& \\
& \\
Subject-matter advisors (Math): \\ \textit{\footnotesize review prompted significant technical changes} & Mark Greenfield (Ch. II.1,II.2,II.3) \\
Additional subject-matter advisors (Math): \\ \textit{\footnotesize review prompted significant technical improvements} & Daniel Burns, Alejandro Uribe, Alexander Wilce (Ch. II.4) \\
(Phil): & Josh Hunt (Ch. II.1) \\
& \\
Subject-matter reviewers (Math): \\ \textit{\footnotesize review prompted technical fixes} & Sharif Velasquez (Ch. II.1), Bart Westra (Ch. II.3), Matt Insall, Demitri Lawrence, Junde Song (Ch. II.4) \\
& \\
& \\
Diagrams and figures: \\ \textit{\footnotesize contributed one or more} & Matteo Carcassi (Ch. I.1), Saja Gherri (Ch. II.1,II.2), Tobias Thrien (Ch. II.4) \\
& \\
& \\
Test readers: \\ \textit{\footnotesize reviewed a full chapter or more} & Chami Amarasinghe, Andre Antoine, Hamza Farooq, Alina Garcia, Saja Gherri, Uriah Israel, Micah Johnson, Sean Kelly, Dan McCusker, Pietro Monticone, Everardo Olide, Artem Omelchenko, Robert Rozite, Tobias Thrien \\
Additional test readers: \\ \textit{\footnotesize review prompted corrections and clarifications} & Josce Kooistra, Armin Nikkhah Shirazi, Ayla Rodriguez, Alex Takla, Allan Vanzandt \\
& \\
& \\
% Possible role definitions
%\multicolumn{2}{c}{{\LARGE \textbf{Additional consultants}}} \\
%\multicolumn{2}{c}{\emph{Occasional role in providing significant feedback that reshapes some ideas}} \\
%\multicolumn{2}{c}{{\LARGE \textbf{Consultants}}} \\
%\multicolumn{2}{c}{\emph{Continued role in providing significant feedback that reshapes some ideas}} \\
%\multicolumn{2}{c}{{\LARGE \textbf{Testing/Proof-reading}}} \\
%\multicolumn{2}{c}{\emph{Someone who reads drafts on a regular basis and provided useful feedback (i.e. typos or cause minor corrections)}} \\
%\multicolumn{2}{c}{{\LARGE \textbf{Additional testing}}} \\
%\multicolumn{2}{c}{\emph{Someone who occasional reads drafts on a regular basis and provided useful feedback (i.e. typos or cause minor corrections)}} \\
\end{tabular}
\end{table}
\end{document}